diff options
author | Andres Lagar-Cavilla <andreslc@google.com> | 2014-09-22 17:54:42 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2014-09-24 08:07:58 -0400 |
commit | 57128468080a8b6ea452223036d3e417f748af55 (patch) | |
tree | e89cfc349a9c39710cfab4e387119365a0d64958 /arch/x86/kvm | |
parent | 8a9522d2fe6a1b643d3aef5ab7f097f73c601e7a (diff) |
kvm: Fix page ageing bugs
1. We were calling clear_flush_young_notify in unmap_one, but we are
within an mmu notifier invalidate range scope. The spte exists no more
(due to range_start) and the accessed bit info has already been
propagated (due to kvm_pfn_set_accessed). Simply call
clear_flush_young.
2. We clear_flush_young on a primary MMU PMD, but this may be mapped
as a collection of PTEs by the secondary MMU (e.g. during log-dirty).
This required expanding the interface of the clear_flush_young mmu
notifier, so a lot of code has been trivially touched.
3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate
the access bit by blowing the spte. This requires proper synchronizing
with MMU notifier consumers, like every other removal of spte's does.
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.c | 38 |
1 files changed, 23 insertions, 15 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 47d534066325..3201e93ebd07 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1417 | struct rmap_iterator uninitialized_var(iter); | 1417 | struct rmap_iterator uninitialized_var(iter); |
1418 | int young = 0; | 1418 | int young = 0; |
1419 | 1419 | ||
1420 | /* | 1420 | BUG_ON(!shadow_accessed_mask); |
1421 | * In case of absence of EPT Access and Dirty Bits supports, | ||
1422 | * emulate the accessed bit for EPT, by checking if this page has | ||
1423 | * an EPT mapping, and clearing it if it does. On the next access, | ||
1424 | * a new EPT mapping will be established. | ||
1425 | * This has some overhead, but not as much as the cost of swapping | ||
1426 | * out actively used pages or breaking up actively used hugepages. | ||
1427 | */ | ||
1428 | if (!shadow_accessed_mask) { | ||
1429 | young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data); | ||
1430 | goto out; | ||
1431 | } | ||
1432 | 1421 | ||
1433 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1422 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1434 | sptep = rmap_get_next(&iter)) { | 1423 | sptep = rmap_get_next(&iter)) { |
@@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1440 | (unsigned long *)sptep); | 1429 | (unsigned long *)sptep); |
1441 | } | 1430 | } |
1442 | } | 1431 | } |
1443 | out: | ||
1444 | trace_kvm_age_page(gfn, level, slot, young); | 1432 | trace_kvm_age_page(gfn, level, slot, young); |
1445 | return young; | 1433 | return young; |
1446 | } | 1434 | } |
@@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1489 | kvm_flush_remote_tlbs(vcpu->kvm); | 1477 | kvm_flush_remote_tlbs(vcpu->kvm); |
1490 | } | 1478 | } |
1491 | 1479 | ||
1492 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 1480 | int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) |
1493 | { | 1481 | { |
1494 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 1482 | /* |
1483 | * In case of absence of EPT Access and Dirty Bits supports, | ||
1484 | * emulate the accessed bit for EPT, by checking if this page has | ||
1485 | * an EPT mapping, and clearing it if it does. On the next access, | ||
1486 | * a new EPT mapping will be established. | ||
1487 | * This has some overhead, but not as much as the cost of swapping | ||
1488 | * out actively used pages or breaking up actively used hugepages. | ||
1489 | */ | ||
1490 | if (!shadow_accessed_mask) { | ||
1491 | /* | ||
1492 | * We are holding the kvm->mmu_lock, and we are blowing up | ||
1493 | * shadow PTEs. MMU notifier consumers need to be kept at bay. | ||
1494 | * This is correct as long as we don't decouple the mmu_lock | ||
1495 | * protected regions (like invalidate_range_start|end does). | ||
1496 | */ | ||
1497 | kvm->mmu_notifier_seq++; | ||
1498 | return kvm_handle_hva_range(kvm, start, end, 0, | ||
1499 | kvm_unmap_rmapp); | ||
1500 | } | ||
1501 | |||
1502 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp); | ||
1495 | } | 1503 | } |
1496 | 1504 | ||
1497 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | 1505 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) |