EPT in kvm

650阅读 0评论2014-01-10 embeddedlwp
分类:LINUX

CPU will use both guest page table and extended page table. The address of EPT is set by the function vmx_set_cr3.


  1. 1753 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)  
  2. 1754 {  
  3. 1755         unsigned long guest_cr3;  
  4. 1756         u64 eptp;  
  5. 1757   
  6. 1758         guest_cr3 = cr3;  
  7. 1759         if (enable_ept) {  
  8. 1760                 eptp = construct_eptp(cr3);  
  9. 1761                 vmcs_write64(EPT_POINTER, eptp);  
  10. 1762                 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :  
  11. 1763                         VMX_EPT_IDENTITY_PAGETABLE_ADDR;  
  12. 1764         }  
  13. 1765   
  14. 1766         vmx_flush_tlb(vcpu);  
  15. 1767         vmcs_writel(GUEST_CR3, guest_cr3);  
  16. 1768         if (vcpu->arch.cr0 & X86_CR0_PE)  
  17. 1769                 vmx_fpu_deactivate(vcpu);  
  18. 1770 }  
EPT (optionally) activated on VM entry
– When EPT active, EPT base pointer (loaded on VM entry from VMCS) points to extended page tables
– EPT deactivated on VM exit


3947 static struct kvm_x86_ops vmx_x86_ops = {

3975         .set_cr3 = vmx_set_cr3,

          .....

}

在函数 kvm_vcpu_ioctl 中

static long kvm_vcpu_ioctl(struct file *filp,  unsigned int ioctl, unsigned long arg)
case KVM_RUN:
           r = -EINVAL;
           if (arg)
                     goto out;
           r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1959                 break;

kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run     )-> __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run )->kvm_mmu_reload(struct kvm_vcpu *vcpu)->kvm_mmu_load(struct kvm_vcpu *vcpu)-> kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);


2. EPT is built dynamically and reuse the shadow page code in kvm

EPT is set up by using ept violation vm exit. It maps the gpa directly with the hpa on the host in the ept.


  1. 2091 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,  
  2. 2092                                 u32 error_code)  
  3. 2093 {  
  4. 2094         pfn_t pfn;  
  5. 2095         int r;  
  6. 2096         int largepage = 0;  
  7. 2097         gfn_t gfn = gpa >> PAGE_SHIFT;  
  8. 2098         unsigned long mmu_seq;  
  9. 2099   
  10. 2100         ASSERT(vcpu);  
  11. 2101         ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));  
  12. 2102   
  13. 2103         r = mmu_topup_memory_caches(vcpu);  
  14. 2104         if (r)  
  15. 2105                 return r;  
  16. 2106                   
  17. 2107         if (is_largepage_backed(vcpu, gfn &  
  18. 2108                         ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1))) {  
  19. 2109                 gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);  
  20. 2110                 largepage = 1;  
  21. 2111         }  
  22. 2112         mmu_seq = vcpu->kvm->mmu_notifier_seq;  
  23. 2113         smp_rmb();  
  24. 2114         pfn = gfn_to_pfn(vcpu->kvm, gfn);  
  25. 2115         if (is_error_pfn(pfn)) {  
  26. 2116                 kvm_release_pfn_clean(pfn);  
  27. 2117                 return 1;  
  28. 2118         }  
  29. 2119         spin_lock(&vcpu->kvm->mmu_lock);  
  30. 2120         if (mmu_notifier_retry(vcpu, mmu_seq))  
  31. 2121                 goto out_unlock;  
  32. 2122         kvm_mmu_free_some_pages(vcpu);  
  33. 2123         r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,  
  34. 2124                          largepage, gfn, pfn);  
  35. 2125         spin_unlock(&vcpu->kvm->mmu_lock);  
  36. 2126   
  37. 2127         return r;  
  38. 2128   
  39. 2129 out_unlock:  
  40. 2130         spin_unlock(&vcpu->kvm->mmu_lock);  
  41. 2131         kvm_release_pfn_clean(pfn);  
  42. 2132         return 0;  
  43. 2133 }  
static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{


         context->page_fault = tdp_page_fault;

}

static struct kvm_x86_ops   vmx_x86_ops = {

              .handle_exit = vmx_handle_exit,

}

static long kvm_vcpu_ioctl(struct file *filp,unsigned int ioctl, unsigned long arg)

{

1954         case KVM_RUN:
1955                 r = -EINVAL;
1956                 if (arg)
1957                         goto out;
1958                 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1959                 break;
}

kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)   ->  __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->

vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->   kvm_x86_ops->handle_exit(kvm_run, vcpu)                ->

vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)  ->  kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run)   ->

[EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation       ->  handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_ run)
[EXIT_REASON_EXCEPTION_NMI]         = handle_exception,            ->  handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)   

    ->kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)->vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);


3. All guest physical address go through extended page table, including cr3, address in PDE and PTE.

4. TLB management by invept

It is used to invalidate gpa to hpa translation. There are three kinds of invept.

  1. 476 static inline void ept_sync_global(void)  
  2. 477 {  
  3. 478         if (cpu_has_vmx_invept_global())  
  4. 479                 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);  
  5. 480 }  
  1. 482 static inline void ept_sync_context(u64 eptp)  
  2. 483 {  
  3. 484         if (enable_ept) {  
  4. 485                 if (cpu_has_vmx_invept_context())  
  5. 486                         __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);  
  6. 487                 else  
  7. 488                         ept_sync_global();  
  8. 489         }  
  9. 490 }  
  1. 492 static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)  
  2. 493 {  
  3. 494         if (enable_ept) {  
  4. 495                 if (cpu_has_vmx_invept_individual_addr())  
  5. 496                         __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,  
  6. 497                                         eptp, gpa);  
  7. 498                 else  
  8. 499                         ept_sync_context(eptp);  
  9. 500         }  
  10. 501 }  

5. Support for software control TLB by invvpid
VPID activated if new “enable VPID”control bit is set in VMCS

New 16-bit virtual-processor-ID field (VPID) field in VMCS
–VMM allocates unique value for each guest OS
–VMM uses VPID of 0×0000, no guest can have this VPID;

Cached linear translations are tagged with VPID value.

How to allocate vpid in kvm? It uses the vmx_vpid_bitmap.

  1. 2248 static void allocate_vpid(struct vcpu_vmx *vmx)  
  2. 2249 {  
  3. 2250         int vpid;  
  4. 2251   
  5. 2252         vmx->vpid = 0;  
  6. 2253         if (!enable_vpid)  
  7. 2254                 return;   
  8. 2255         spin_lock(&vmx_vpid_lock);  
  9. 2256         vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);  
  10. 2257         if (vpid < VMX_NR_VPIDS) {  
  11. 2258                 vmx->vpid = vpid;  
  12. 2259                 __set_bit(vpid, vmx_vpid_bitmap);  
  13. 2260         }  
  14. 2261         spin_unlock(&vmx_vpid_lock);  
  15. 2262 }  

 

上一篇:EPT学习总结及KVM的处理
下一篇:qemu-kvm 内存虚拟化