aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorAvi Kivity <avi@redhat.com>2010-03-15 07:59:57 -0400
committerAvi Kivity <avi@redhat.com>2010-05-17 05:15:43 -0400
commit08e850c6536db302050c0287649e68e3bbdfe2c7 (patch)
tree9eb7e554b53ea9eb1cb408f81234f404a43a54ab /arch/x86/kvm
parentfbc5d139bb92e6822e4c000f97631a072d8babf9 (diff)
KVM: MMU: Reinstate pte prefetch on invlpg
Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races. However, the SDM is adamant that prefetch is allowed: "The processor may create entries in paging-structure caches for translations required for prefetches and for accesses that are a result of speculative execution that would never actually occur in the executed code path." And, in fact, there was a race in the prefetch code: we picked up the pte without the mmu lock held, so an older invlpg could install the pte over a newer invlpg. Reinstate the prefetch logic, but this time note whether another invlpg has executed using a counter. If a race occured, do not install the pte. Signed-off-by: Avi Kivity <avi@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/mmu.c37
-rw-r--r--arch/x86/kvm/paging_tmpl.h15
2 files changed, 38 insertions, 14 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 91f8b171c825..064c3efb49dc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2613 int flooded = 0; 2613 int flooded = 0;
2614 int npte; 2614 int npte;
2615 int r; 2615 int r;
2616 int invlpg_counter;
2616 2617
2617 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); 2618 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
2618 2619
2619 switch (bytes) { 2620 invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
2620 case 4:
2621 gentry = *(const u32 *)new;
2622 break;
2623 case 8:
2624 gentry = *(const u64 *)new;
2625 break;
2626 default:
2627 gentry = 0;
2628 break;
2629 }
2630 2621
2631 /* 2622 /*
2632 * Assume that the pte write on a page table of the same type 2623 * Assume that the pte write on a page table of the same type
@@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
2634 * (might be false while changing modes). Note it is verified later 2625 * (might be false while changing modes). Note it is verified later
2635 * by update_pte(). 2626 * by update_pte().
2636 */ 2627 */
2637 if (is_pae(vcpu) && bytes == 4) { 2628 if ((is_pae(vcpu) && bytes == 4) || !new) {
2638 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 2629 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
2639 gpa &= ~(gpa_t)7; 2630 if (is_pae(vcpu)) {
2640 r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8); 2631 gpa &= ~(gpa_t)7;
2632 bytes = 8;
2633 }
2634 r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
2641 if (r) 2635 if (r)
2642 gentry = 0; 2636 gentry = 0;
2637 new = (const u8 *)&gentry;
2638 }
2639
2640 switch (bytes) {
2641 case 4:
2642 gentry = *(const u32 *)new;
2643 break;
2644 case 8:
2645 gentry = *(const u64 *)new;
2646 break;
2647 default:
2648 gentry = 0;
2649 break;
2643 } 2650 }
2644 2651
2645 mmu_guess_page_from_pte_write(vcpu, gpa, gentry); 2652 mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
2646 spin_lock(&vcpu->kvm->mmu_lock); 2653 spin_lock(&vcpu->kvm->mmu_lock);
2654 if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
2655 gentry = 0;
2647 kvm_mmu_access_page(vcpu, gfn); 2656 kvm_mmu_access_page(vcpu, gfn);
2648 kvm_mmu_free_some_pages(vcpu); 2657 kvm_mmu_free_some_pages(vcpu);
2649 ++vcpu->kvm->stat.mmu_pte_write; 2658 ++vcpu->kvm->stat.mmu_pte_write;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4b37e1acd375..067797a72768 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -463,6 +463,7 @@ out_unlock:
463static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) 463static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
464{ 464{
465 struct kvm_shadow_walk_iterator iterator; 465 struct kvm_shadow_walk_iterator iterator;
466 gpa_t pte_gpa = -1;
466 int level; 467 int level;
467 u64 *sptep; 468 u64 *sptep;
468 int need_flush = 0; 469 int need_flush = 0;
@@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
476 if (level == PT_PAGE_TABLE_LEVEL || 477 if (level == PT_PAGE_TABLE_LEVEL ||
477 ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || 478 ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
478 ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { 479 ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
480 struct kvm_mmu_page *sp = page_header(__pa(sptep));
481
482 pte_gpa = (sp->gfn << PAGE_SHIFT);
483 pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
479 484
480 if (is_shadow_present_pte(*sptep)) { 485 if (is_shadow_present_pte(*sptep)) {
481 rmap_remove(vcpu->kvm, sptep); 486 rmap_remove(vcpu->kvm, sptep);
@@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
493 498
494 if (need_flush) 499 if (need_flush)
495 kvm_flush_remote_tlbs(vcpu->kvm); 500 kvm_flush_remote_tlbs(vcpu->kvm);
501
502 atomic_inc(&vcpu->kvm->arch.invlpg_counter);
503
496 spin_unlock(&vcpu->kvm->mmu_lock); 504 spin_unlock(&vcpu->kvm->mmu_lock);
505
506 if (pte_gpa == -1)
507 return;
508
509 if (mmu_topup_memory_caches(vcpu))
510 return;
511 kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
497} 512}
498 513
499static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, 514static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,