CPU will use both guest page table and extended page table. The address of EPT is set by the function vmx_set_cr3.
- 1753 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
- 1754 {
- 1755 unsigned long guest_cr3;
- 1756 u64 eptp;
- 1757
- 1758 guest_cr3 = cr3;
- 1759 if (enable_ept) {
- 1760 eptp = construct_eptp(cr3);
- 1761 vmcs_write64(EPT_POINTER, eptp);
- 1762 guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
- 1763 VMX_EPT_IDENTITY_PAGETABLE_ADDR;
- 1764 }
- 1765
- 1766 vmx_flush_tlb(vcpu);
- 1767 vmcs_writel(GUEST_CR3, guest_cr3);
- 1768 if (vcpu->arch.cr0 & X86_CR0_PE)
- 1769 vmx_fpu_deactivate(vcpu);
- 1770 }
– When EPT active, EPT base pointer (loaded on VM entry from VMCS) points to extended page tables
– EPT deactivated on VM exit
3947 static struct kvm_x86_ops vmx_x86_ops = {
3975 .set_cr3 = vmx_set_cr3,
.....
}
在函数 kvm_vcpu_ioctl 中
static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
case KVM_RUN:
r = -EINVAL;
if (arg)
goto out;
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1959 break;
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run )-> __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run )->kvm_mmu_reload(struct kvm_vcpu *vcpu)->kvm_mmu_load(struct kvm_vcpu *vcpu)-> kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa);
2. EPT is built dynamically and reuse the shadow page code in kvm
EPT is set up by using ept violation vm exit. It maps the gpa directly with the hpa on the host in the ept.
- 2091 static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
- 2092 u32 error_code)
- 2093 {
- 2094 pfn_t pfn;
- 2095 int r;
- 2096 int largepage = 0;
- 2097 gfn_t gfn = gpa >> PAGE_SHIFT;
- 2098 unsigned long mmu_seq;
- 2099
- 2100 ASSERT(vcpu);
- 2101 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
- 2102
- 2103 r = mmu_topup_memory_caches(vcpu);
- 2104 if (r)
- 2105 return r;
- 2106
- 2107 if (is_largepage_backed(vcpu, gfn &
- 2108 ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1))) {
- 2109 gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
- 2110 largepage = 1;
- 2111 }
- 2112 mmu_seq = vcpu->kvm->mmu_notifier_seq;
- 2113 smp_rmb();
- 2114 pfn = gfn_to_pfn(vcpu->kvm, gfn);
- 2115 if (is_error_pfn(pfn)) {
- 2116 kvm_release_pfn_clean(pfn);
- 2117 return 1;
- 2118 }
- 2119 spin_lock(&vcpu->kvm->mmu_lock);
- 2120 if (mmu_notifier_retry(vcpu, mmu_seq))
- 2121 goto out_unlock;
- 2122 kvm_mmu_free_some_pages(vcpu);
- 2123 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
- 2124 largepage, gfn, pfn);
- 2125 spin_unlock(&vcpu->kvm->mmu_lock);
- 2126
- 2127 return r;
- 2128
- 2129 out_unlock:
- 2130 spin_unlock(&vcpu->kvm->mmu_lock);
- 2131 kvm_release_pfn_clean(pfn);
- 2132 return 0;
- 2133 }
{
context->page_fault = tdp_page_fault;
}
static struct kvm_x86_ops vmx_x86_ops = {
.handle_exit = vmx_handle_exit,
}
static long kvm_vcpu_ioctl(struct file *filp,unsigned int ioctl, unsigned long arg)
{
1954 case KVM_RUN:
1955 r = -EINVAL;
1956 if (arg)
1957 goto out;
1958 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1959 break;
}
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -> __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)->
vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)-> kvm_x86_ops->handle_exit(kvm_run, vcpu) ->
vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) -> kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run) ->
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation -> handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_ run)
[EXIT_REASON_EXCEPTION_NMI] = handle_exception, -> handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
->kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)->vcpu->arch.mmu.page_fault(vcpu, cr2, error_code);
3. All guest physical address go through extended page table, including cr3, address in PDE and PTE.
4. TLB management by invept
It is used to invalidate gpa to hpa translation. There are three kinds of invept.
- 476 static inline void ept_sync_global(void)
- 477 {
- 478 if (cpu_has_vmx_invept_global())
- 479 __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
- 480 }
- 482 static inline void ept_sync_context(u64 eptp)
- 483 {
- 484 if (enable_ept) {
- 485 if (cpu_has_vmx_invept_context())
- 486 __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
- 487 else
- 488 ept_sync_global();
- 489 }
- 490 }
- 492 static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
- 493 {
- 494 if (enable_ept) {
- 495 if (cpu_has_vmx_invept_individual_addr())
- 496 __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
- 497 eptp, gpa);
- 498 else
- 499 ept_sync_context(eptp);
- 500 }
- 501 }
5. Support for software control TLB by invvpid
VPID activated if new “enable VPID”control bit is set in VMCS
New 16-bit virtual-processor-ID field (VPID) field in VMCS
–VMM allocates unique value for each guest OS
–VMM uses VPID of 0×0000, no guest can have this VPID;
Cached linear translations are tagged with VPID value.
How to allocate vpid in kvm? It uses the vmx_vpid_bitmap.
- 2248 static void allocate_vpid(struct vcpu_vmx *vmx)
- 2249 {
- 2250 int vpid;
- 2251
- 2252 vmx->vpid = 0;
- 2253 if (!enable_vpid)
- 2254 return;
- 2255 spin_lock(&vmx_vpid_lock);
- 2256 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
- 2257 if (vpid < VMX_NR_VPIDS) {
- 2258 vmx->vpid = vpid;
- 2259 __set_bit(vpid, vmx_vpid_bitmap);
- 2260 }
- 2261 spin_unlock(&vmx_vpid_lock);
- 2262 }