KVM: MMU: improve write flooding detected

Detecting write-flooding does not work well, when we handle page written, if the last speculative spte is not accessed, we treat the page is write-flooding, however, we can speculative spte on many path, such as pte prefetch, page synced, that means the last speculative spte may be not point to the written page and the written page can be accessed via other sptes, so depends on the Accessed bit of the last speculative spte is not enough Instead of detected page accessed, we can detect whether the spte is accessed after it is written, if the spte is not accessed but it is written frequently, we treat is not a page table or it not used for a long time Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
author: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> 2011-09-22 04:58:36 -0400
committer: Avi Kivity <avi@redhat.com> 2011-12-27 04:17:02 -0500
commit: a30f47cb150dd8d109923eeb65fe73e8b3e09046 (patch)
tree: 09a723b407b0fefb1b05a30b490b8372ec2cf5cf /arch/x86/kvm
parent: 5d9ca30e96f567b67a36727aa4ebb34911a2b84a (diff)
2 files changed, 30 insertions, 44 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ca6f72ab4c3b..e9534cec003f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1653,6 +1653,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
                sp->spt[i] = 0ull;
 }
+static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
+{
+        sp->write_flooding_count = 0;
+}
+static void clear_sp_write_flooding_count(u64 *spte)
+{
+        struct kvm_mmu_page *sp =  page_header(__pa(spte));
+        __clear_sp_write_flooding_count(sp);
+}
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                                             gfn_t gfn,
                                             gva_t gaddr,
@@ -1696,6 +1708,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
                } else if (sp->unsync)
                        kvm_mmu_mark_parents_unsync(sp);
+                __clear_sp_write_flooding_count(sp);
                trace_kvm_mmu_get_page(sp, false);
                return sp;
        }
@@ -1848,15 +1861,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
        mmu_page_remove_parent_pte(sp, parent_pte);
 }
-static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
-{
-        int i;
-        struct kvm_vcpu *vcpu;
-        kvm_for_each_vcpu(i, vcpu, kvm)
-                vcpu->arch.last_pte_updated = NULL;
-}
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        u64 *parent_pte;
@@ -1916,7 +1920,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
        }
        sp->role.invalid = 1;
-        kvm_mmu_reset_last_pte_updated(kvm);
        return ret;
 }
@@ -2361,8 +2364,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                }
        }
        kvm_release_pfn_clean(pfn);
-        if (speculative)
-                vcpu->arch.last_pte_updated = sptep;
 }
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -3523,13 +3524,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
                kvm_mmu_flush_tlb(vcpu);
 }
-static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
-{
-        u64 *spte = vcpu->arch.last_pte_updated;
-        return !!(spte && (*spte & shadow_accessed_mask));
-}
 static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
                                    const u8 *new, int *bytes)
 {
@@ -3570,22 +3564,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
 * If we're seeing too many writes to a page, it may no longer be a page table,
 * or we may be forking, in which case it is better to unmap the page.
 */
-static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
+static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
 {
-        bool flooded = false;
+        /*
+         * Skip write-flooding detected for the sp whose level is 1, because
-        if (gfn == vcpu->arch.last_pt_write_gfn
+         * it can become unsync, then the guest page is not write-protected.
-            && !last_updated_pte_accessed(vcpu)) {
+         */
-                ++vcpu->arch.last_pt_write_count;
+        if (sp->role.level == 1)
-                if (vcpu->arch.last_pt_write_count >= 3)
+                return false;
-                        flooded = true;
-        } else {
-                vcpu->arch.last_pt_write_gfn = gfn;
-                vcpu->arch.last_pt_write_count = 1;
-                vcpu->arch.last_pte_updated = NULL;
-        }
-        return flooded;
+        return ++sp->write_flooding_count >= 3;
 }
 /*
@@ -3657,7 +3645,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        LIST_HEAD(invalid_list);
        u64 entry, gentry, *spte;
        int npte;
-        bool remote_flush, local_flush, zap_page, flooded, misaligned;
+        bool remote_flush, local_flush, zap_page;
        /*
         * If we don't have indirect shadow pages, it means no page is
@@ -3683,12 +3671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        ++vcpu->kvm->stat.mmu_pte_write;
        trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
-        flooded = detect_write_flooding(vcpu, gfn);
        mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
        for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
-                misaligned = detect_write_misaligned(sp, gpa, bytes);
+                spte = get_written_sptes(sp, gpa, &npte);
-                if (misaligned || flooded) {
+                if (detect_write_misaligned(sp, gpa, bytes) ||
+                      detect_write_flooding(sp, spte)) {
                        zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
                                                     &invalid_list);
                        ++vcpu->kvm->stat.mmu_flooded;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9efb86035774..52e9d58cec2b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -497,6 +497,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
             shadow_walk_next(&it)) {
                gfn_t table_gfn;
+                clear_sp_write_flooding_count(it.sptep);
                drop_large_spte(vcpu, it.sptep);
                sp = NULL;
@@ -522,6 +523,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
             shadow_walk_next(&it)) {
                gfn_t direct_gfn;
+                clear_sp_write_flooding_count(it.sptep);
                validate_direct_spte(vcpu, it.sptep, direct_access);
                drop_large_spte(vcpu, it.sptep);
@@ -536,6 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                link_shadow_page(it.sptep, sp);
        }
+        clear_sp_write_flooding_count(it.sptep);
        mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
                     user_fault, write_fault, emulate, it.level,
                     gw->gfn, pfn, prefault, map_writable);
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
         */
        if (!r) {
                pgprintk("%s: guest page fault\n", __func__);
-                if (!prefault) {
+                if (!prefault)
                        inject_page_fault(vcpu, &walker.fault);
-                        /* reset fork detector */
-                        vcpu->arch.last_pt_write_count = 0;
-                }
                return 0;
        }
@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
        pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
                 sptep, *sptep, emulate);
-        if (!emulate)
-                vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
        ++vcpu->stat.pf_fixed;
        trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
        spin_unlock(&vcpu->kvm->mmu_lock);
author	Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>	2011-09-22 04:58:36 -0400
committer	Avi Kivity <avi@redhat.com>	2011-12-27 04:17:02 -0500
commit	a30f47cb150dd8d109923eeb65fe73e8b3e09046 (patch)
tree	09a723b407b0fefb1b05a30b490b8372ec2cf5cf /arch/x86/kvm
parent	5d9ca30e96f567b67a36727aa4ebb34911a2b84a (diff)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ca6f72ab4c3b..e9534cec003f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c
@@ -1653,6 +1653,18 @@ static void init_shadow_page_table(struct kvm_mmu_page *sp)
1653	sp->spt[i] = 0ull;	1653	sp->spt[i] = 0ull;
1654	}	1654	}
1655		1655
		1656	static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
		1657	{
		1658	sp->write_flooding_count = 0;
		1659	}
		1660
		1661	static void clear_sp_write_flooding_count(u64 *spte)
		1662	{
		1663	struct kvm_mmu_page *sp = page_header(__pa(spte));
		1664
		1665	__clear_sp_write_flooding_count(sp);
		1666	}
		1667
1656	static struct kvm_mmu_page kvm_mmu_get_page(struct kvm_vcpu vcpu,	1668	static struct kvm_mmu_page kvm_mmu_get_page(struct kvm_vcpu vcpu,
1657	gfn_t gfn,	1669	gfn_t gfn,
1658	gva_t gaddr,	1670	gva_t gaddr,
@@ -1696,6 +1708,7 @@ static struct kvm_mmu_page kvm_mmu_get_page(struct kvm_vcpu vcpu,
1696	} else if (sp->unsync)	1708	} else if (sp->unsync)
1697	kvm_mmu_mark_parents_unsync(sp);	1709	kvm_mmu_mark_parents_unsync(sp);
1698		1710
		1711	__clear_sp_write_flooding_count(sp);
1699	trace_kvm_mmu_get_page(sp, false);	1712	trace_kvm_mmu_get_page(sp, false);
1700	return sp;	1713	return sp;
1701	}	1714	}
@@ -1848,15 +1861,6 @@ static void kvm_mmu_put_page(struct kvm_mmu_page sp, u64 parent_pte)
1848	mmu_page_remove_parent_pte(sp, parent_pte);	1861	mmu_page_remove_parent_pte(sp, parent_pte);
1849	}	1862	}
1850		1863
1851	static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
1852	{
1853	int i;
1854	struct kvm_vcpu *vcpu;
1855
1856	kvm_for_each_vcpu(i, vcpu, kvm)
1857	vcpu->arch.last_pte_updated = NULL;
1858	}
1859
1860	static void kvm_mmu_unlink_parents(struct kvm kvm, struct kvm_mmu_page sp)	1864	static void kvm_mmu_unlink_parents(struct kvm kvm, struct kvm_mmu_page sp)
1861	{	1865	{
1862	u64 *parent_pte;	1866	u64 *parent_pte;
@@ -1916,7 +1920,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm kvm, struct kvm_mmu_page sp,
1916	}	1920	}
1917		1921
1918	sp->role.invalid = 1;	1922	sp->role.invalid = 1;
1919	kvm_mmu_reset_last_pte_updated(kvm);
1920	return ret;	1923	return ret;
1921	}	1924	}
1922		1925
@@ -2361,8 +2364,6 @@ static void mmu_set_spte(struct kvm_vcpu vcpu, u64 sptep,
2361	}	2364	}
2362	}	2365	}
2363	kvm_release_pfn_clean(pfn);	2366	kvm_release_pfn_clean(pfn);
2364	if (speculative)
2365	vcpu->arch.last_pte_updated = sptep;
2366	}	2367	}
2367		2368
2368	static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)	2369	static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -3523,13 +3524,6 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
3523	kvm_mmu_flush_tlb(vcpu);	3524	kvm_mmu_flush_tlb(vcpu);
3524	}	3525	}
3525		3526
3526	static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3527	{
3528	u64 *spte = vcpu->arch.last_pte_updated;
3529
3530	return !!(spte && (*spte & shadow_accessed_mask));
3531	}
3532
3533	static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu vcpu, gpa_t gpa,	3527	static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu vcpu, gpa_t gpa,
3534	const u8 new, int bytes)	3528	const u8 new, int bytes)
3535	{	3529	{
@@ -3570,22 +3564,16 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu vcpu, gpa_t gpa,
3570	* If we're seeing too many writes to a page, it may no longer be a page table,	3564	* If we're seeing too many writes to a page, it may no longer be a page table,
3571	* or we may be forking, in which case it is better to unmap the page.	3565	* or we may be forking, in which case it is better to unmap the page.
3572	*/	3566	*/
3573	static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)	3567	static bool detect_write_flooding(struct kvm_mmu_page sp, u64 spte)
3574	{	3568	{
3575	bool flooded = false;	3569	/*
3576		3570	* Skip write-flooding detected for the sp whose level is 1, because
3577	if (gfn == vcpu->arch.last_pt_write_gfn	3571	* it can become unsync, then the guest page is not write-protected.
3578	&& !last_updated_pte_accessed(vcpu)) {	3572	*/
3579	++vcpu->arch.last_pt_write_count;	3573	if (sp->role.level == 1)
3580	if (vcpu->arch.last_pt_write_count >= 3)	3574	return false;
3581	flooded = true;
3582	} else {
3583	vcpu->arch.last_pt_write_gfn = gfn;
3584	vcpu->arch.last_pt_write_count = 1;
3585	vcpu->arch.last_pte_updated = NULL;
3586	}
3587		3575
3588	return flooded;	3576	return ++sp->write_flooding_count >= 3;
3589	}	3577	}
3590		3578
3591	/*	3579	/*
@@ -3657,7 +3645,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3657	LIST_HEAD(invalid_list);	3645	LIST_HEAD(invalid_list);
3658	u64 entry, gentry, *spte;	3646	u64 entry, gentry, *spte;
3659	int npte;	3647	int npte;
3660	bool remote_flush, local_flush, zap_page, flooded, misaligned;	3648	bool remote_flush, local_flush, zap_page;
3661		3649
3662	/*	3650	/*
3663	* If we don't have indirect shadow pages, it means no page is	3651	* If we don't have indirect shadow pages, it means no page is
@@ -3683,12 +3671,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3683	++vcpu->kvm->stat.mmu_pte_write;	3671	++vcpu->kvm->stat.mmu_pte_write;
3684	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);	3672	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
3685		3673
3686	flooded = detect_write_flooding(vcpu, gfn);
3687	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;	3674	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
3688	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {	3675	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3689	misaligned = detect_write_misaligned(sp, gpa, bytes);	3676	spte = get_written_sptes(sp, gpa, &npte);
3690		3677
3691	if (misaligned \|\| flooded) {	3678	if (detect_write_misaligned(sp, gpa, bytes) \|\|
		3679	detect_write_flooding(sp, spte)) {
3692	zap_page \|= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,	3680	zap_page \|= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
3693	&invalid_list);	3681	&invalid_list);
3694	++vcpu->kvm->stat.mmu_flooded;	3682	++vcpu->kvm->stat.mmu_flooded;


diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9efb86035774..52e9d58cec2b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h
@@ -497,6 +497,7 @@ static u64 FNAME(fetch)(struct kvm_vcpu vcpu, gva_t addr,
497	shadow_walk_next(&it)) {	497	shadow_walk_next(&it)) {
498	gfn_t table_gfn;	498	gfn_t table_gfn;
499		499
		500	clear_sp_write_flooding_count(it.sptep);
500	drop_large_spte(vcpu, it.sptep);	501	drop_large_spte(vcpu, it.sptep);
501		502
502	sp = NULL;	503	sp = NULL;
@@ -522,6 +523,7 @@ static u64 FNAME(fetch)(struct kvm_vcpu vcpu, gva_t addr,
522	shadow_walk_next(&it)) {	523	shadow_walk_next(&it)) {
523	gfn_t direct_gfn;	524	gfn_t direct_gfn;
524		525
		526	clear_sp_write_flooding_count(it.sptep);
525	validate_direct_spte(vcpu, it.sptep, direct_access);	527	validate_direct_spte(vcpu, it.sptep, direct_access);
526		528
527	drop_large_spte(vcpu, it.sptep);	529	drop_large_spte(vcpu, it.sptep);
@@ -536,6 +538,7 @@ static u64 FNAME(fetch)(struct kvm_vcpu vcpu, gva_t addr,
536	link_shadow_page(it.sptep, sp);	538	link_shadow_page(it.sptep, sp);
537	}	539	}
538		540
		541	clear_sp_write_flooding_count(it.sptep);
539	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,	542	mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
540	user_fault, write_fault, emulate, it.level,	543	user_fault, write_fault, emulate, it.level,
541	gw->gfn, pfn, prefault, map_writable);	544	gw->gfn, pfn, prefault, map_writable);
@@ -599,11 +602,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
599	*/	602	*/
600	if (!r) {	603	if (!r) {
601	pgprintk("%s: guest page fault\n", __func__);	604	pgprintk("%s: guest page fault\n", __func__);
602	if (!prefault) {	605	if (!prefault)
603	inject_page_fault(vcpu, &walker.fault);	606	inject_page_fault(vcpu, &walker.fault);
604	/* reset fork detector */	607
605	vcpu->arch.last_pt_write_count = 0;
606	}
607	return 0;	608	return 0;
608	}	609	}
609		610
@@ -641,9 +642,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
641	pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,	642	pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
642	sptep, *sptep, emulate);	643	sptep, *sptep, emulate);
643		644
644	if (!emulate)
645	vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
646
647	++vcpu->stat.pf_fixed;	645	++vcpu->stat.pf_fixed;
648	trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);	646	trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
649	spin_unlock(&vcpu->kvm->mmu_lock);	647	spin_unlock(&vcpu->kvm->mmu_lock);