EPT in kvm-embeddedlwp-ChinaUnix博客

CPU will use both guest page table and extended page table. The address of EPT is set by the function vmx_set_cr3.

[cpp]view plaincopyprint?
				
			1753 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)  
		
			1754 {  
		
			1755         unsigned long guest_cr3;  
		
			1756         u64 eptp;  
		
			1757   
		
			1758         guest_cr3 = cr3;  
		
			1759         if (enable_ept) {  
		
			1760                 eptp = construct_eptp(cr3);  
		
			1761                 vmcs_write64(EPT_POINTER, eptp);  
		
			1762                 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :  
		
			1763                         VMX_EPT_IDENTITY_PAGETABLE_ADDR;  
		
			1764         }  
		
			1765   
		
			1766         vmx_flush_tlb(vcpu);  
		
			1767         vmcs_writel(GUEST_CR3, guest_cr3);  
		
			1768         if (vcpu->arch.cr0 & X86_CR0_PE)  
		
			1769                 vmx_fpu_deactivate(vcpu);  
		
			1770 }

EPT (optionally) activated on VM entry
– When EPT active, EPT base pointer (loaded on VM entry from VMCS) points to extended page tables
– EPT deactivated on VM exit

3947 static struct kvm_x86_ops vmx_x86_ops = {

3975 .set_cr3 = vmx_set_cr3,

.....

}

在函数 kvm_vcpu_ioctl 中

static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
case KVM_RUN:
         r = -EINVAL;
   if (arg)
                     goto out;
   r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1959                 break;

kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run )-> __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run )->kvm_mmu_reload(struct kvm_vcpu *vcpu)->kvm_mmu_load(struct kvm_vcpu *vcpu)-> kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);

2. EPT is built dynamically and reuse the shadow page code in kvm

EPT is set up by using ept violation vm exit. It maps the gpa directly with the hpa on the host in the ept.

[cpp]view plaincopyprint?
				
			2091 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,  
		
			2092                                 u32 error_code)  
		
			2093 {  
		
			2094         pfn_t pfn;  
		
			2095         int r;  
		
			2096         int largepage = 0;  
		
			2097         gfn_t gfn = gpa >> PAGE_SHIFT;  
		
			2098         unsigned long mmu_seq;  
		
			2099   
		
			2100         ASSERT(vcpu);  
		
			2101         ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));  
		
			2102   
		
			2103         r = mmu_topup_memory_caches(vcpu);  
		
			2104         if (r)  
		
			2105                 return r;  
		
			2106                   
		
			2107         if (is_largepage_backed(vcpu, gfn &  
		
			2108                         ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1))) {  
		
			2109                 gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);  
		
			2110                 largepage = 1;  
		
			2111         }  
		
			2112         mmu_seq = vcpu->kvm->mmu_notifier_seq;  
		
			2113         smp_rmb();  
		
			2114         pfn = gfn_to_pfn(vcpu->kvm, gfn);  
		
			2115         if (is_error_pfn(pfn)) {  
		
			2116                 kvm_release_pfn_clean(pfn);  
		
			2117                 return 1;  
		
			2118         }  
		
			2119         spin_lock(&vcpu->kvm->mmu_lock);  
		
			2120         if (mmu_notifier_retry(vcpu, mmu_seq))  
		
			2121                 goto out_unlock;  
		
			2122         kvm_mmu_free_some_pages(vcpu);  
		
			2123         r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,  
		
			2124                          largepage, gfn, pfn);  
		
			2125         spin_unlock(&vcpu->kvm->mmu_lock);  
		
			2126   
		
			2127         return r;  
		
			2128   
		
			2129 out_unlock:  
		
			2130         spin_unlock(&vcpu->kvm->mmu_lock);  
		
			2131         kvm_release_pfn_clean(pfn);  
		
			2132         return 0;  
		
			2133 }

static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
{

context->page_fault = tdp_page_fault;

}

static struct kvm_x86_ops vmx_x86_ops = {

.handle_exit = vmx_handle_exit,

}

static long kvm_vcpu_ioctl(struct file *filp,unsigned int ioctl, unsigned long arg)

{

1954         case KVM_RUN:
1955                 r = -EINVAL;
1956                 if (arg)
1957                         goto out;
1958                 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1959                 break;
}

kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -> __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->

vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)-> kvm_x86_ops->handle_exit(kvm_run, vcpu) ->

vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) -> kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run) ->

[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation -> handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_ run)
[EXIT_REASON_EXCEPTION_NMI] = handle_exception, -> handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)

->kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)->vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);

3. All guest physical address go through extended page table, including cr3, address in PDE and PTE.

4. TLB management by invept

It is used to invalidate gpa to hpa translation. There are three kinds of invept.

[cpp]view plaincopyprint?
				
			476 static inline void ept_sync_global(void)  
		
			477 {  
		
			478         if (cpu_has_vmx_invept_global())  
		
			479                 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);  
		
			480 }

[cpp]view plaincopyprint?
				
			482 static inline void ept_sync_context(u64 eptp)  
		
			483 {  
		
			484         if (enable_ept) {  
		
			485                 if (cpu_has_vmx_invept_context())  
		
			486                         __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);  
		
			487                 else  
		
			488                         ept_sync_global();  
		
			489         }  
		
			490 }

[cpp]view plaincopyprint?
				
			492 static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)  
		
			493 {  
		
			494         if (enable_ept) {  
		
			495                 if (cpu_has_vmx_invept_individual_addr())  
		
			496                         __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,  
		
			497                                         eptp, gpa);  
		
			498                 else  
		
			499                         ept_sync_context(eptp);  
		
			500         }  
		
			501 }

5. Support for software control TLB by invvpid
VPID activated if new “enable VPID”control bit is set in VMCS

New 16-bit virtual-processor-ID field (VPID) field in VMCS
–VMM allocates unique value for each guest OS
–VMM uses VPID of 0×0000, no guest can have this VPID;

Cached linear translations are tagged with VPID value.

How to allocate vpid in kvm? It uses the vmx_vpid_bitmap.

[cpp]view plaincopyprint?
				
			2248 static void allocate_vpid(struct vcpu_vmx *vmx)  
		
			2249 {  
		
			2250         int vpid;  
		
			2251   
		
			2252         vmx->vpid = 0;  
		
			2253         if (!enable_vpid)  
		
			2254                 return;   
		
			2255         spin_lock(&vmx_vpid_lock);  
		
			2256         vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);  
		
			2257         if (vpid < VMX_NR_VPIDS) {  
		
			2258                 vmx->vpid = vpid;  
		
			2259                 __set_bit(vpid, vmx_vpid_bitmap);  
		
			2260         }  
		
			2261         spin_unlock(&vmx_vpid_lock);  
		
			2262 }