aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorWanpeng Li <wanpeng.li@linux.intel.com>2015-04-03 03:40:25 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2015-04-08 04:47:04 -0400
commit3ea3b7fa9af067982f34b6745584558821eea79d (patch)
tree64029d66d8a1179310bd61b1dadc9ae7dca2d93c /arch/x86/kvm
parent1119022c71fb11826041787cf0ebffc1a1b0ba5b (diff)
kvm: mmu: lazy collapse small sptes into large sptes
Dirty logging tracks sptes in 4k granularity, meaning that large sptes have to be split. If live migration is successful, the guest in the source machine will be destroyed and large sptes will be created in the destination. However, the guest continues to run in the source machine (for example if live migration fails), small sptes will remain around and cause bad performance. This patch introduce lazy collapsing of small sptes into large sptes. The rmap will be scanned in ioctl context when dirty logging is stopped, dropping those sptes which can be collapsed into a single large-page spte. Later page faults will create the large-page sptes. Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com> Message-Id: <1428046825-6905-1-git-send-email-wanpeng.li@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/mmu.c73
-rw-r--r--arch/x86/kvm/x86.c17
2 files changed, 90 insertions, 0 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cee759299a35..146f295ee322 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
4465 kvm_flush_remote_tlbs(kvm); 4465 kvm_flush_remote_tlbs(kvm);
4466} 4466}
4467 4467
4468static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
4469 unsigned long *rmapp)
4470{
4471 u64 *sptep;
4472 struct rmap_iterator iter;
4473 int need_tlb_flush = 0;
4474 pfn_t pfn;
4475 struct kvm_mmu_page *sp;
4476
4477 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
4478 BUG_ON(!(*sptep & PT_PRESENT_MASK));
4479
4480 sp = page_header(__pa(sptep));
4481 pfn = spte_to_pfn(*sptep);
4482
4483 /*
4484 * Only EPT supported for now; otherwise, one would need to
4485 * find out efficiently whether the guest page tables are
4486 * also using huge pages.
4487 */
4488 if (sp->role.direct &&
4489 !kvm_is_reserved_pfn(pfn) &&
4490 PageTransCompound(pfn_to_page(pfn))) {
4491 drop_spte(kvm, sptep);
4492 sptep = rmap_get_first(*rmapp, &iter);
4493 need_tlb_flush = 1;
4494 } else
4495 sptep = rmap_get_next(&iter);
4496 }
4497
4498 return need_tlb_flush;
4499}
4500
4501void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
4502 struct kvm_memory_slot *memslot)
4503{
4504 bool flush = false;
4505 unsigned long *rmapp;
4506 unsigned long last_index, index;
4507 gfn_t gfn_start, gfn_end;
4508
4509 spin_lock(&kvm->mmu_lock);
4510
4511 gfn_start = memslot->base_gfn;
4512 gfn_end = memslot->base_gfn + memslot->npages - 1;
4513
4514 if (gfn_start >= gfn_end)
4515 goto out;
4516
4517 rmapp = memslot->arch.rmap[0];
4518 last_index = gfn_to_index(gfn_end, memslot->base_gfn,
4519 PT_PAGE_TABLE_LEVEL);
4520
4521 for (index = 0; index <= last_index; ++index, ++rmapp) {
4522 if (*rmapp)
4523 flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
4524
4525 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
4526 if (flush) {
4527 kvm_flush_remote_tlbs(kvm);
4528 flush = false;
4529 }
4530 cond_resched_lock(&kvm->mmu_lock);
4531 }
4532 }
4533
4534 if (flush)
4535 kvm_flush_remote_tlbs(kvm);
4536
4537out:
4538 spin_unlock(&kvm->mmu_lock);
4539}
4540
4468void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, 4541void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
4469 struct kvm_memory_slot *memslot) 4542 struct kvm_memory_slot *memslot)
4470{ 4543{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index faf044dba60c..b8cb1d091697 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7665,6 +7665,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
7665 new = id_to_memslot(kvm->memslots, mem->slot); 7665 new = id_to_memslot(kvm->memslots, mem->slot);
7666 7666
7667 /* 7667 /*
7668 * Dirty logging tracks sptes in 4k granularity, meaning that large
7669 * sptes have to be split. If live migration is successful, the guest
7670 * in the source machine will be destroyed and large sptes will be
7671 * created in the destination. However, if the guest continues to run
7672 * in the source machine (for example if live migration fails), small
7673 * sptes will remain around and cause bad performance.
7674 *
7675 * Scan sptes if dirty logging has been stopped, dropping those
7676 * which can be collapsed into a single large-page spte. Later
7677 * page faults will create the large-page sptes.
7678 */
7679 if ((change != KVM_MR_DELETE) &&
7680 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
7681 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
7682 kvm_mmu_zap_collapsible_sptes(kvm, new);
7683
7684 /*
7668 * Set up write protection and/or dirty logging for the new slot. 7685 * Set up write protection and/or dirty logging for the new slot.
7669 * 7686 *
7670 * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have 7687 * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have