diff options
author | Avi Kivity <avi@redhat.com> | 2010-03-15 07:59:57 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2010-05-17 05:15:43 -0400 |
commit | 08e850c6536db302050c0287649e68e3bbdfe2c7 (patch) | |
tree | 9eb7e554b53ea9eb1cb408f81234f404a43a54ab /arch/x86/kvm | |
parent | fbc5d139bb92e6822e4c000f97631a072d8babf9 (diff) |
KVM: MMU: Reinstate pte prefetch on invlpg
Commit fb341f57 removed the pte prefetch on guest invlpg, citing guest races.
However, the SDM is adamant that prefetch is allowed:
"The processor may create entries in paging-structure caches for
translations required for prefetches and for accesses that are a
result of speculative execution that would never actually occur
in the executed code path."
And, in fact, there was a race in the prefetch code: we picked up the pte
without the mmu lock held, so an older invlpg could install the pte over
a newer invlpg.
Reinstate the prefetch logic, but this time note whether another invlpg has
executed using a counter. If a race occured, do not install the pte.
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.c | 37 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 15 |
2 files changed, 38 insertions, 14 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 91f8b171c825..064c3efb49dc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2613,20 +2613,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2613 | int flooded = 0; | 2613 | int flooded = 0; |
2614 | int npte; | 2614 | int npte; |
2615 | int r; | 2615 | int r; |
2616 | int invlpg_counter; | ||
2616 | 2617 | ||
2617 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 2618 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
2618 | 2619 | ||
2619 | switch (bytes) { | 2620 | invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); |
2620 | case 4: | ||
2621 | gentry = *(const u32 *)new; | ||
2622 | break; | ||
2623 | case 8: | ||
2624 | gentry = *(const u64 *)new; | ||
2625 | break; | ||
2626 | default: | ||
2627 | gentry = 0; | ||
2628 | break; | ||
2629 | } | ||
2630 | 2621 | ||
2631 | /* | 2622 | /* |
2632 | * Assume that the pte write on a page table of the same type | 2623 | * Assume that the pte write on a page table of the same type |
@@ -2634,16 +2625,34 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2634 | * (might be false while changing modes). Note it is verified later | 2625 | * (might be false while changing modes). Note it is verified later |
2635 | * by update_pte(). | 2626 | * by update_pte(). |
2636 | */ | 2627 | */ |
2637 | if (is_pae(vcpu) && bytes == 4) { | 2628 | if ((is_pae(vcpu) && bytes == 4) || !new) { |
2638 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 2629 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
2639 | gpa &= ~(gpa_t)7; | 2630 | if (is_pae(vcpu)) { |
2640 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, 8); | 2631 | gpa &= ~(gpa_t)7; |
2632 | bytes = 8; | ||
2633 | } | ||
2634 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
2641 | if (r) | 2635 | if (r) |
2642 | gentry = 0; | 2636 | gentry = 0; |
2637 | new = (const u8 *)&gentry; | ||
2638 | } | ||
2639 | |||
2640 | switch (bytes) { | ||
2641 | case 4: | ||
2642 | gentry = *(const u32 *)new; | ||
2643 | break; | ||
2644 | case 8: | ||
2645 | gentry = *(const u64 *)new; | ||
2646 | break; | ||
2647 | default: | ||
2648 | gentry = 0; | ||
2649 | break; | ||
2643 | } | 2650 | } |
2644 | 2651 | ||
2645 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | 2652 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); |
2646 | spin_lock(&vcpu->kvm->mmu_lock); | 2653 | spin_lock(&vcpu->kvm->mmu_lock); |
2654 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | ||
2655 | gentry = 0; | ||
2647 | kvm_mmu_access_page(vcpu, gfn); | 2656 | kvm_mmu_access_page(vcpu, gfn); |
2648 | kvm_mmu_free_some_pages(vcpu); | 2657 | kvm_mmu_free_some_pages(vcpu); |
2649 | ++vcpu->kvm->stat.mmu_pte_write; | 2658 | ++vcpu->kvm->stat.mmu_pte_write; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4b37e1acd375..067797a72768 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -463,6 +463,7 @@ out_unlock: | |||
463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 463 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
464 | { | 464 | { |
465 | struct kvm_shadow_walk_iterator iterator; | 465 | struct kvm_shadow_walk_iterator iterator; |
466 | gpa_t pte_gpa = -1; | ||
466 | int level; | 467 | int level; |
467 | u64 *sptep; | 468 | u64 *sptep; |
468 | int need_flush = 0; | 469 | int need_flush = 0; |
@@ -476,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
476 | if (level == PT_PAGE_TABLE_LEVEL || | 477 | if (level == PT_PAGE_TABLE_LEVEL || |
477 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 478 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
478 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 479 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
480 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
481 | |||
482 | pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
483 | pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
479 | 484 | ||
480 | if (is_shadow_present_pte(*sptep)) { | 485 | if (is_shadow_present_pte(*sptep)) { |
481 | rmap_remove(vcpu->kvm, sptep); | 486 | rmap_remove(vcpu->kvm, sptep); |
@@ -493,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
493 | 498 | ||
494 | if (need_flush) | 499 | if (need_flush) |
495 | kvm_flush_remote_tlbs(vcpu->kvm); | 500 | kvm_flush_remote_tlbs(vcpu->kvm); |
501 | |||
502 | atomic_inc(&vcpu->kvm->arch.invlpg_counter); | ||
503 | |||
496 | spin_unlock(&vcpu->kvm->mmu_lock); | 504 | spin_unlock(&vcpu->kvm->mmu_lock); |
505 | |||
506 | if (pte_gpa == -1) | ||
507 | return; | ||
508 | |||
509 | if (mmu_topup_memory_caches(vcpu)) | ||
510 | return; | ||
511 | kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); | ||
497 | } | 512 | } |
498 | 513 | ||
499 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, | 514 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |