KVM: MMU: Reinstate pte prefetch on invlpg

Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races. However, the SDM is adamant that prefetch is allowed: "The processor may create entries in paging-structure caches for translations required for prefetches and for accesses that are a result of speculative execution that would never actually occur in the executed code path." And, in fact, there was a race in the prefetch code: we picked up the pte without the mmu lock held, so an older invlpg could install the pte over a newer invlpg. Reinstate the prefetch logic, but this time note whether another invlpg has executed using a counter. If a race occured, do not install the pte. Signed-off-by: Avi Kivity <avi@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
author: Avi Kivity <avi@redhat.com> 2010-03-15 07:59:57 -0400
committer: Avi Kivity <avi@redhat.com> 2010-05-17 05:15:43 -0400
commit: 08e850c6536db302050c0287649e68e3bbdfe2c7 (patch)
tree: 9eb7e554b53ea9eb1cb408f81234f404a43a54ab /arch/x86/kvm
parent: fbc5d139bb92e6822e4c000f97631a072d8babf9 (diff)
2 files changed, 38 insertions, 14 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 91f8b171c825..064c3efb49dc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        int flooded = 0;
        int npte;
        int r;
+        int invlpg_counter;
        pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-        switch (bytes) {
+        invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
-        case 4:
-                gentry = *(const u32 *)new;
-                break;
-        case 8:
-                gentry = *(const u64 *)new;
-                break;
-        default:
-                gentry = 0;
-                break;
-        }
        /*
         * Assume that the pte write on a page table of the same type
@@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
         * (might be false while changing modes).  Note it is verified later
         * by update_pte().
         */
-        if (is_pae(vcpu) && bytes == 4) {
+        if ((is_pae(vcpu) && bytes == 4) || !new) {
                /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
-                gpa &= ~(gpa_t)7;
+                if (is_pae(vcpu)) {
-                r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8);
+                        gpa &= ~(gpa_t)7;
+                        bytes = 8;
+                }
+                r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
                if (r)
                        gentry = 0;
+                new = (const u8 *)&gentry;
+        }
+        switch (bytes) {
+        case 4:
+                gentry = *(const u32 *)new;
+                break;
+        case 8:
+                gentry = *(const u64 *)new;
+                break;
+        default:
+                gentry = 0;
+                break;
        }
        mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
        spin_lock(&vcpu->kvm->mmu_lock);
+        if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
+                gentry = 0;
        kvm_mmu_access_page(vcpu, gfn);
        kvm_mmu_free_some_pages(vcpu);
        ++vcpu->kvm->stat.mmu_pte_write;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4b37e1acd375..067797a72768 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -463,6 +463,7 @@ out_unlock:
 static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
        struct kvm_shadow_walk_iterator iterator;
+        gpa_t pte_gpa = -1;
        int level;
        u64 *sptep;
        int need_flush = 0;
@@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
                if (level == PT_PAGE_TABLE_LEVEL  ||
                    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
                    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
+                        struct kvm_mmu_page *sp = page_header(__pa(sptep));
+                        pte_gpa = (sp->gfn << PAGE_SHIFT);
+                        pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
                        if (is_shadow_present_pte(*sptep)) {
                                rmap_remove(vcpu->kvm, sptep);
@@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
        if (need_flush)
                kvm_flush_remote_tlbs(vcpu->kvm);
+        atomic_inc(&vcpu->kvm->arch.invlpg_counter);
        spin_unlock(&vcpu->kvm->mmu_lock);
+        if (pte_gpa == -1)
+                return;
+        if (mmu_topup_memory_caches(vcpu))
+                return;
+        kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
 }
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
author	Avi Kivity <avi@redhat.com>	2010-03-15 07:59:57 -0400
committer	Avi Kivity <avi@redhat.com>	2010-05-17 05:15:43 -0400
commit	08e850c6536db302050c0287649e68e3bbdfe2c7 (patch)
tree	9eb7e554b53ea9eb1cb408f81234f404a43a54ab /arch/x86/kvm
parent	fbc5d139bb92e6822e4c000f97631a072d8babf9 (diff)