diff options
author | Avi Kivity <avi@qumranet.com> | 2008-05-15 06:51:35 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-07-20 05:40:50 -0400 |
commit | 1b7fcd3263e5f12dba43d27b64e1578bec070c28 (patch) | |
tree | 7f8d9679404f0b46803c6b1cf926235d170606ee /arch/x86/kvm/mmu.c | |
parent | 7682f2d0dd3ff5bd2756eac018a5b4e7e30ef16c (diff) |
KVM: MMU: Fix false flooding when a pte points to page table
The KVM MMU tries to detect when a speculative pte update is not actually
used by demand fault, by checking the accessed bit of the shadow pte. If
the shadow pte has not been accessed, we deem that page table flooded and
remove the shadow page table, allowing further pte updates to proceed
without emulation.
However, if the pte itself points at a page table and only used for write
operations, the accessed bit will never be set since all access will happen
through the emulator.
This is exactly what happens with kscand on old (2.4.x) HIGHMEM kernels.
The kernel points a kmap_atomic() pte at a page table, and then
proceeds with read-modify-write operations to look at the dirty and accessed
bits. We get a false flood trigger on the kmap ptes, which results in the
mmu spending all its time setting up and tearing down shadows.
Fix by setting the shadow accessed bit on emulated accesses.
Signed-off-by: Avi Kivity <avi@qumranet.com>
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8e449dbcc596..53f1ed852ca2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1122,8 +1122,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1122 | else | 1122 | else |
1123 | kvm_release_pfn_clean(pfn); | 1123 | kvm_release_pfn_clean(pfn); |
1124 | } | 1124 | } |
1125 | if (!ptwrite || !*ptwrite) | 1125 | if (speculative) { |
1126 | vcpu->arch.last_pte_updated = shadow_pte; | 1126 | vcpu->arch.last_pte_updated = shadow_pte; |
1127 | vcpu->arch.last_pte_gfn = gfn; | ||
1128 | } | ||
1127 | } | 1129 | } |
1128 | 1130 | ||
1129 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 1131 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -1671,6 +1673,18 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1671 | vcpu->arch.update_pte.pfn = pfn; | 1673 | vcpu->arch.update_pte.pfn = pfn; |
1672 | } | 1674 | } |
1673 | 1675 | ||
1676 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1677 | { | ||
1678 | u64 *spte = vcpu->arch.last_pte_updated; | ||
1679 | |||
1680 | if (spte | ||
1681 | && vcpu->arch.last_pte_gfn == gfn | ||
1682 | && shadow_accessed_mask | ||
1683 | && !(*spte & shadow_accessed_mask) | ||
1684 | && is_shadow_present_pte(*spte)) | ||
1685 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | ||
1686 | } | ||
1687 | |||
1674 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1688 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
1675 | const u8 *new, int bytes) | 1689 | const u8 *new, int bytes) |
1676 | { | 1690 | { |
@@ -1694,6 +1708,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1694 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 1708 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
1695 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 1709 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); |
1696 | spin_lock(&vcpu->kvm->mmu_lock); | 1710 | spin_lock(&vcpu->kvm->mmu_lock); |
1711 | kvm_mmu_access_page(vcpu, gfn); | ||
1697 | kvm_mmu_free_some_pages(vcpu); | 1712 | kvm_mmu_free_some_pages(vcpu); |
1698 | ++vcpu->kvm->stat.mmu_pte_write; | 1713 | ++vcpu->kvm->stat.mmu_pte_write; |
1699 | kvm_mmu_audit(vcpu, "pre pte write"); | 1714 | kvm_mmu_audit(vcpu, "pre pte write"); |