aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@cn.fujitsu.com>2011-09-22 04:58:36 -0400
committerAvi Kivity <avi@redhat.com>2011-12-27 04:17:02 -0500
commita30f47cb150dd8d109923eeb65fe73e8b3e09046 (patch)
tree09a723b407b0fefb1b05a30b490b8372ec2cf5cf /arch/x86/kvm
parent5d9ca30e96f567b67a36727aa4ebb34911a2b84a (diff)
KVM: MMU: improve write flooding detected
Detecting write-flooding does not work well, when we handle page written, if the last speculative spte is not accessed, we treat the page is write-flooding, however, we can speculative spte on many path, such as pte prefetch, page synced, that means the last speculative spte may be not point to the written page and the written page can be accessed via other sptes, so depends on the Accessed bit of the last speculative spte is not enough Instead of detected page accessed, we can detect whether the spte is accessed after it is written, if the spte is not accessed but it is written frequently, we treat is not a page table or it not used for a long time Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/mmu.c62
-rw-r--r--arch/x86/kvm/paging_tmpl.h12
2 files changed, 30 insertions, 44 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ca6f72ab4c3b..e9534cec003f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1653,6 +1653,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
1653 sp->spt[i] = 0ull; 1653 sp->spt[i] = 0ull;
1654} 1654}
1655 1655
1656static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
1657{
1658 sp->write_flooding_count = 0;
1659}
1660
1661static void clear_sp_write_flooding_count(u64 *spte)
1662{
1663 struct kvm_mmu_page *sp = page_header(__pa(spte));
1664
1665 __clear_sp_write_flooding_count(sp);
1666}
1667
1656static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1668static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1657 gfn_t gfn, 1669 gfn_t gfn,
1658 gva_t gaddr, 1670 gva_t gaddr,
@@ -1696,6 +1708,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
1696 } else if (sp->unsync) 1708 } else if (sp->unsync)
1697 kvm_mmu_mark_parents_unsync(sp); 1709 kvm_mmu_mark_parents_unsync(sp);
1698 1710
1711 __clear_sp_write_flooding_count(sp);
1699 trace_kvm_mmu_get_page(sp, false); 1712 trace_kvm_mmu_get_page(sp, false);
1700 return sp; 1713 return sp;
1701 } 1714 }
@@ -1848,15 +1861,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
1848 mmu_page_remove_parent_pte(sp, parent_pte); 1861 mmu_page_remove_parent_pte(sp, parent_pte);
1849} 1862}
1850 1863
1851static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
1852{
1853 int i;
1854 struct kvm_vcpu *vcpu;
1855
1856 kvm_for_each_vcpu(i, vcpu, kvm)
1857 vcpu->arch.last_pte_updated = NULL;
1858}
1859
1860static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) 1864static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1861{ 1865{
1862 u64 *parent_pte; 1866 u64 *parent_pte;
@@ -1916,7 +1920,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
1916 } 1920 }
1917 1921
1918 sp->role.invalid = 1; 1922 sp->role.invalid = 1;
1919 kvm_mmu_reset_last_pte_updated(kvm);
1920 return ret; 1923 return ret;
1921} 1924}
1922 1925
@@ -2361,8 +2364,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2361 } 2364 }
2362 } 2365 }
2363 kvm_release_pfn_clean(pfn); 2366 kvm_release_pfn_clean(pfn);
2364 if (speculative)
2365 vcpu->arch.last_pte_updated = sptep;
2366} 2367}
2367 2368
2368static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) 2369static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -3523,13 +3524,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
3523 kvm_mmu_flush_tlb(vcpu); 3524 kvm_mmu_flush_tlb(vcpu);
3524} 3525}
3525 3526
3526static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3527{
3528 u64 *spte = vcpu->arch.last_pte_updated;
3529
3530 return !!(spte && (*spte & shadow_accessed_mask));
3531}
3532
3533static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, 3527static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3534 const u8 *new, int *bytes) 3528 const u8 *new, int *bytes)
3535{ 3529{
@@ -3570,22 +3564,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3570 * If we're seeing too many writes to a page, it may no longer be a page table, 3564 * If we're seeing too many writes to a page, it may no longer be a page table,
3571 * or we may be forking, in which case it is better to unmap the page. 3565 * or we may be forking, in which case it is better to unmap the page.
3572 */ 3566 */
3573static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) 3567static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
3574{ 3568{
3575 bool flooded = false; 3569 /*
3576 3570 * Skip write-flooding detected for the sp whose level is 1, because
3577 if (gfn == vcpu->arch.last_pt_write_gfn 3571 * it can become unsync, then the guest page is not write-protected.
3578 && !last_updated_pte_accessed(vcpu)) { 3572 */
3579 ++vcpu->arch.last_pt_write_count; 3573 if (sp->role.level == 1)
3580 if (vcpu->arch.last_pt_write_count >= 3) 3574 return false;
3581 flooded = true;
3582 } else {
3583 vcpu->arch.last_pt_write_gfn = gfn;
3584 vcpu->arch.last_pt_write_count = 1;
3585 vcpu->arch.last_pte_updated = NULL;
3586 }
3587 3575
3588 return flooded; 3576 return ++sp->write_flooding_count >= 3;
3589} 3577}
3590 3578
3591/* 3579/*
@@ -3657,7 +3645,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3657 LIST_HEAD(invalid_list); 3645 LIST_HEAD(invalid_list);
3658 u64 entry, gentry, *spte; 3646 u64 entry, gentry, *spte;
3659 int npte; 3647 int npte;
3660 bool remote_flush, local_flush, zap_page, flooded, misaligned; 3648 bool remote_flush, local_flush, zap_page;
3661 3649
3662 /* 3650 /*
3663 * If we don't have indirect shadow pages, it means no page is 3651 * If we don't have indirect shadow pages, it means no page is
@@ -3683,12 +3671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3683 ++vcpu->kvm->stat.mmu_pte_write; 3671 ++vcpu->kvm->stat.mmu_pte_write;
3684 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); 3672 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
3685 3673
3686 flooded = detect_write_flooding(vcpu, gfn);
3687 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 3674 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
3688 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 3675 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3689 misaligned = detect_write_misaligned(sp, gpa, bytes); 3676 spte = get_written_sptes(sp, gpa, &npte);
3690 3677
3691 if (misaligned || flooded) { 3678 if (detect_write_misaligned(sp, gpa, bytes) ||
3679 detect_write_flooding(sp, spte)) {
3692 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 3680 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
3693 &invalid_list); 3681 &invalid_list);
3694 ++vcpu->kvm->stat.mmu_flooded; 3682 ++vcpu->kvm->stat.mmu_flooded;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9efb86035774..52e9d58cec2b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
497 shadow_walk_next(&it)) { 497 shadow_walk_next(&it)) {
498 gfn_t table_gfn; 498 gfn_t table_gfn;
499 499
500 clear_sp_write_flooding_count(it.sptep);
500 drop_large_spte(vcpu, it.sptep); 501 drop_large_spte(vcpu, it.sptep);
501 502
502 sp = NULL; 503 sp = NULL;
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
522 shadow_walk_next(&it)) { 523 shadow_walk_next(&it)) {
523 gfn_t direct_gfn; 524 gfn_t direct_gfn;
524 525
526 clear_sp_write_flooding_count(it.sptep);
525 validate_direct_spte(vcpu, it.sptep, direct_access); 527 validate_direct_spte(vcpu, it.sptep, direct_access);
526 528
527 drop_large_spte(vcpu, it.sptep); 529 drop_large_spte(vcpu, it.sptep);
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
536 link_shadow_page(it.sptep, sp); 538 link_shadow_page(it.sptep, sp);
537 } 539 }
538 540
541 clear_sp_write_flooding_count(it.sptep);
539 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, 542 mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
540 user_fault, write_fault, emulate, it.level, 543 user_fault, write_fault, emulate, it.level,
541 gw->gfn, pfn, prefault, map_writable); 544 gw->gfn, pfn, prefault, map_writable);
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
599 */ 602 */
600 if (!r) { 603 if (!r) {
601 pgprintk("%s: guest page fault\n", __func__); 604 pgprintk("%s: guest page fault\n", __func__);
602 if (!prefault) { 605 if (!prefault)
603 inject_page_fault(vcpu, &walker.fault); 606 inject_page_fault(vcpu, &walker.fault);
604 /* reset fork detector */ 607
605 vcpu->arch.last_pt_write_count = 0;
606 }
607 return 0; 608 return 0;
608 } 609 }
609 610
@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
641 pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__, 642 pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
642 sptep, *sptep, emulate); 643 sptep, *sptep, emulate);
643 644
644 if (!emulate)
645 vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
646
647 ++vcpu->stat.pf_fixed; 645 ++vcpu->stat.pf_fixed;
648 trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); 646 trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
649 spin_unlock(&vcpu->kvm->mmu_lock); 647 spin_unlock(&vcpu->kvm->mmu_lock);