KVM: MMU: fast prefetch spte on invlpg path

Fast prefetch spte for the unsync shadow page on invlpg path Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
author: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> 2011-09-22 04:56:39 -0400
committer: Avi Kivity <avi@redhat.com> 2011-12-27 04:16:56 -0500
commit: f57f2ef58f6703e6df70ed52a198920cb3e8edba (patch)
tree: 831564ca3314ef897fdcfcbd70ef91a52d369a13 /arch/x86
parent: 505aef8f30a95f7e4abf2c07e54ded1521587ba0 (diff)
4 files changed, 36 insertions, 40 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8ab0d760231..3c9ea26c7aea 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -461,7 +461,6 @@ struct kvm_arch {
        unsigned int n_requested_mmu_pages;
        unsigned int n_max_mmu_pages;
        unsigned int indirect_shadow_pages;
-        atomic_t invlpg_counter;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
        /*
         * Hash table of struct kvm_mmu_page.
@@ -757,8 +756,7 @@ int fx_init(struct kvm_vcpu *vcpu);
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-                       const u8 *new, int bytes,
+                       const u8 *new, int bytes);
-                       bool guest_initiated);
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d15f908649e7..c01137f10c6b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3531,8 +3531,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
 }
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-                       const u8 *new, int bytes,
+                       const u8 *new, int bytes)
-                       bool guest_initiated)
 {
        gfn_t gfn = gpa >> PAGE_SHIFT;
        union kvm_mmu_page_role mask = { .word = 0 };
@@ -3541,7 +3540,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        LIST_HEAD(invalid_list);
        u64 entry, gentry, *spte;
        unsigned pte_size, page_offset, misaligned, quadrant, offset;
-        int level, npte, invlpg_counter, r, flooded = 0;
+        int level, npte, r, flooded = 0;
        bool remote_flush, local_flush, zap_page;
        /*
@@ -3556,19 +3555,16 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-        invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
        /*
         * Assume that the pte write on a page table of the same type
         * as the current vcpu paging mode since we update the sptes only
         * when they have the same mode.
         */
-        if ((is_pae(vcpu) && bytes == 4) || !new) {
+        if (is_pae(vcpu) && bytes == 4) {
                /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
-                if (is_pae(vcpu)) {
+                gpa &= ~(gpa_t)7;
-                        gpa &= ~(gpa_t)7;
+                bytes = 8;
-                        bytes = 8;
-                }
                r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
                if (r)
                        gentry = 0;
@@ -3594,22 +3590,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
         */
        mmu_topup_memory_caches(vcpu);
        spin_lock(&vcpu->kvm->mmu_lock);
-        if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
-                gentry = 0;
        kvm_mmu_free_some_pages(vcpu);
        ++vcpu->kvm->stat.mmu_pte_write;
        trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
-        if (guest_initiated) {
+        if (gfn == vcpu->arch.last_pt_write_gfn
-                if (gfn == vcpu->arch.last_pt_write_gfn
+            && !last_updated_pte_accessed(vcpu)) {
-                    && !last_updated_pte_accessed(vcpu)) {
+                ++vcpu->arch.last_pt_write_count;
-                        ++vcpu->arch.last_pt_write_count;
+                if (vcpu->arch.last_pt_write_count >= 3)
-                        if (vcpu->arch.last_pt_write_count >= 3)
+                        flooded = 1;
-                                flooded = 1;
+        } else {
-                } else {
+                vcpu->arch.last_pt_write_gfn = gfn;
-                        vcpu->arch.last_pt_write_gfn = gfn;
+                vcpu->arch.last_pt_write_count = 1;
-                        vcpu->arch.last_pt_write_count = 1;
+                vcpu->arch.last_pte_updated = NULL;
-                        vcpu->arch.last_pte_updated = NULL;
-                }
        }
        mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d8d3906649da..9efb86035774 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -672,20 +672,27 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 {
        struct kvm_shadow_walk_iterator iterator;
        struct kvm_mmu_page *sp;
-        gpa_t pte_gpa = -1;
        int level;
        u64 *sptep;
        vcpu_clear_mmio_info(vcpu, gva);
-        spin_lock(&vcpu->kvm->mmu_lock);
+        /*
+         * No need to check return value here, rmap_can_add() can
+         * help us to skip pte prefetch later.
+         */
+        mmu_topup_memory_caches(vcpu);
+        spin_lock(&vcpu->kvm->mmu_lock);
        for_each_shadow_entry(vcpu, gva, iterator) {
                level = iterator.level;
                sptep = iterator.sptep;
                sp = page_header(__pa(sptep));
                if (is_last_spte(*sptep, level)) {
+                        pt_element_t gpte;
+                        gpa_t pte_gpa;
                        if (!sp->unsync)
                                break;
@@ -694,22 +701,21 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
                        if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
                                kvm_flush_remote_tlbs(vcpu->kvm);
+                        if (!rmap_can_add(vcpu))
+                                break;
+                        if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
+                                                  sizeof(pt_element_t)))
+                                break;
+                        FNAME(update_pte)(vcpu, sp, sptep, &gpte);
                }
                if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
                        break;
        }
-        atomic_inc(&vcpu->kvm->arch.invlpg_counter);
        spin_unlock(&vcpu->kvm->mmu_lock);
-        if (pte_gpa == -1)
-                return;
-        if (mmu_topup_memory_caches(vcpu))
-                return;
-        kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
 }
 static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a2154487917d..9c980ce26e61 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4087,7 +4087,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
        ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
        if (ret < 0)
                return 0;
-        kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
+        kvm_mmu_pte_write(vcpu, gpa, val, bytes);
        return 1;
 }
@@ -4324,7 +4324,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
        if (!exchanged)
                return X86EMUL_CMPXCHG_FAILED;
-        kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
+        kvm_mmu_pte_write(vcpu, gpa, new, bytes);
        return X86EMUL_CONTINUE;
author	Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>	2011-09-22 04:56:39 -0400
committer	Avi Kivity <avi@redhat.com>	2011-12-27 04:16:56 -0500
commit	f57f2ef58f6703e6df70ed52a198920cb3e8edba (patch)
tree	831564ca3314ef897fdcfcbd70ef91a52d369a13 /arch/x86
parent	505aef8f30a95f7e4abf2c07e54ded1521587ba0 (diff)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8ab0d760231..3c9ea26c7aea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h
@@ -461,7 +461,6 @@ struct kvm_arch {
461	unsigned int n_requested_mmu_pages;	461	unsigned int n_requested_mmu_pages;
462	unsigned int n_max_mmu_pages;	462	unsigned int n_max_mmu_pages;
463	unsigned int indirect_shadow_pages;	463	unsigned int indirect_shadow_pages;
464	atomic_t invlpg_counter;
465	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];	464	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
466	/*	465	/*
467	* Hash table of struct kvm_mmu_page.	466	* Hash table of struct kvm_mmu_page.
@@ -757,8 +756,7 @@ int fx_init(struct kvm_vcpu *vcpu);
757		756
758	void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);	757	void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
759	void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,	758	void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
760	const u8 *new, int bytes,	759	const u8 *new, int bytes);
761	bool guest_initiated);
762	int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);	760	int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
763	int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);	761	int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
764	void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);	762	void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);


diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d15f908649e7..c01137f10c6b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c
@@ -3531,8 +3531,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3531	}	3531	}
3532		3532
3533	void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,	3533	void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3534	const u8 *new, int bytes,	3534	const u8 *new, int bytes)
3535	bool guest_initiated)
3536	{	3535	{
3537	gfn_t gfn = gpa >> PAGE_SHIFT;	3536	gfn_t gfn = gpa >> PAGE_SHIFT;
3538	union kvm_mmu_page_role mask = { .word = 0 };	3537	union kvm_mmu_page_role mask = { .word = 0 };
@@ -3541,7 +3540,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3541	LIST_HEAD(invalid_list);	3540	LIST_HEAD(invalid_list);
3542	u64 entry, gentry, *spte;	3541	u64 entry, gentry, *spte;
3543	unsigned pte_size, page_offset, misaligned, quadrant, offset;	3542	unsigned pte_size, page_offset, misaligned, quadrant, offset;
3544	int level, npte, invlpg_counter, r, flooded = 0;	3543	int level, npte, r, flooded = 0;
3545	bool remote_flush, local_flush, zap_page;	3544	bool remote_flush, local_flush, zap_page;
3546		3545
3547	/*	3546	/*
@@ -3556,19 +3555,16 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3556		3555
3557	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);	3556	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
3558		3557
3559	invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
3560
3561	/*	3558	/*
3562	* Assume that the pte write on a page table of the same type	3559	* Assume that the pte write on a page table of the same type
3563	* as the current vcpu paging mode since we update the sptes only	3560	* as the current vcpu paging mode since we update the sptes only
3564	* when they have the same mode.	3561	* when they have the same mode.
3565	*/	3562	*/
3566	if ((is_pae(vcpu) && bytes == 4) \|\| !new) {	3563	if (is_pae(vcpu) && bytes == 4) {
3567	/* Handle a 32-bit guest writing two halves of a 64-bit gpte */	3564	/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3568	if (is_pae(vcpu)) {	3565	gpa &= ~(gpa_t)7;
3569	gpa &= ~(gpa_t)7;	3566	bytes = 8;
3570	bytes = 8;	3567
3571	}
3572	r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));	3568	r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
3573	if (r)	3569	if (r)
3574	gentry = 0;	3570	gentry = 0;
@@ -3594,22 +3590,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3594	*/	3590	*/
3595	mmu_topup_memory_caches(vcpu);	3591	mmu_topup_memory_caches(vcpu);
3596	spin_lock(&vcpu->kvm->mmu_lock);	3592	spin_lock(&vcpu->kvm->mmu_lock);
3597	if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
3598	gentry = 0;
3599	kvm_mmu_free_some_pages(vcpu);	3593	kvm_mmu_free_some_pages(vcpu);
3600	++vcpu->kvm->stat.mmu_pte_write;	3594	++vcpu->kvm->stat.mmu_pte_write;
3601	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);	3595	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
3602	if (guest_initiated) {	3596	if (gfn == vcpu->arch.last_pt_write_gfn
3603	if (gfn == vcpu->arch.last_pt_write_gfn	3597	&& !last_updated_pte_accessed(vcpu)) {
3604	&& !last_updated_pte_accessed(vcpu)) {	3598	++vcpu->arch.last_pt_write_count;
3605	++vcpu->arch.last_pt_write_count;	3599	if (vcpu->arch.last_pt_write_count >= 3)
3606	if (vcpu->arch.last_pt_write_count >= 3)	3600	flooded = 1;
3607	flooded = 1;	3601	} else {
3608	} else {	3602	vcpu->arch.last_pt_write_gfn = gfn;
3609	vcpu->arch.last_pt_write_gfn = gfn;	3603	vcpu->arch.last_pt_write_count = 1;
3610	vcpu->arch.last_pt_write_count = 1;	3604	vcpu->arch.last_pte_updated = NULL;
3611	vcpu->arch.last_pte_updated = NULL;
3612	}
3613	}	3605	}
3614		3606
3615	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;	3607	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;


diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d8d3906649da..9efb86035774 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h
@@ -672,20 +672,27 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
672	{	672	{
673	struct kvm_shadow_walk_iterator iterator;	673	struct kvm_shadow_walk_iterator iterator;
674	struct kvm_mmu_page *sp;	674	struct kvm_mmu_page *sp;
675	gpa_t pte_gpa = -1;
676	int level;	675	int level;
677	u64 *sptep;	676	u64 *sptep;
678		677
679	vcpu_clear_mmio_info(vcpu, gva);	678	vcpu_clear_mmio_info(vcpu, gva);
680		679
681	spin_lock(&vcpu->kvm->mmu_lock);	680	/*
		681	* No need to check return value here, rmap_can_add() can
		682	* help us to skip pte prefetch later.
		683	*/
		684	mmu_topup_memory_caches(vcpu);
682		685
		686	spin_lock(&vcpu->kvm->mmu_lock);
683	for_each_shadow_entry(vcpu, gva, iterator) {	687	for_each_shadow_entry(vcpu, gva, iterator) {
684	level = iterator.level;	688	level = iterator.level;
685	sptep = iterator.sptep;	689	sptep = iterator.sptep;
686		690
687	sp = page_header(__pa(sptep));	691	sp = page_header(__pa(sptep));
688	if (is_last_spte(*sptep, level)) {	692	if (is_last_spte(*sptep, level)) {
		693	pt_element_t gpte;
		694	gpa_t pte_gpa;
		695
689	if (!sp->unsync)	696	if (!sp->unsync)
690	break;	697	break;
691		698
@@ -694,22 +701,21 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
694		701
695	if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))	702	if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
696	kvm_flush_remote_tlbs(vcpu->kvm);	703	kvm_flush_remote_tlbs(vcpu->kvm);
		704
		705	if (!rmap_can_add(vcpu))
		706	break;
		707
		708	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
		709	sizeof(pt_element_t)))
		710	break;
		711
		712	FNAME(update_pte)(vcpu, sp, sptep, &gpte);
697	}	713	}
698		714
699	if (!is_shadow_present_pte(*sptep) \|\| !sp->unsync_children)	715	if (!is_shadow_present_pte(*sptep) \|\| !sp->unsync_children)
700	break;	716	break;
701	}	717	}
702
703	atomic_inc(&vcpu->kvm->arch.invlpg_counter);
704
705	spin_unlock(&vcpu->kvm->mmu_lock);	718	spin_unlock(&vcpu->kvm->mmu_lock);
706
707	if (pte_gpa == -1)
708	return;
709
710	if (mmu_topup_memory_caches(vcpu))
711	return;
712	kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
713	}	719	}
714		720
715	static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,	721	static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,


diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a2154487917d..9c980ce26e61 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -4087,7 +4087,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4087	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);	4087	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4088	if (ret < 0)	4088	if (ret < 0)
4089	return 0;	4089	return 0;
4090	kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);	4090	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
4091	return 1;	4091	return 1;
4092	}	4092	}
4093		4093
@@ -4324,7 +4324,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4324	if (!exchanged)	4324	if (!exchanged)
4325	return X86EMUL_CMPXCHG_FAILED;	4325	return X86EMUL_CMPXCHG_FAILED;
4326		4326
4327	kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);	4327	kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4328		4328
4329	return X86EMUL_CONTINUE;	4329	return X86EMUL_CONTINUE;
4330		4330