KVM: MMU: split kvm_mmu_pte_write function

kvm_mmu_pte_write is too long, we split it for better readable Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
author: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> 2011-09-22 04:57:23 -0400
committer: Avi Kivity <avi@redhat.com> 2011-12-27 04:16:59 -0500
commit: 889e5cbced6c191bb7e25c1b30b43e59a12561f9 (patch)
tree: 4eddee4776696bd93fbff6af68acf7e1146c392f /arch/x86
parent: f8734352c6f9c4f3d85f0c97b7731b7f925c62fd (diff)
1 files changed, 119 insertions, 75 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7e57938bb86..986aea55366 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
        return !!(spte && (*spte & shadow_accessed_mask));
 }
-void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
-                       const u8 *new, int bytes)
+                                    const u8 *new, int *bytes)
 {
-        gfn_t gfn = gpa >> PAGE_SHIFT;
+        u64 gentry;
-        union kvm_mmu_page_role mask = { .word = 0 };
+        int r;
-        struct kvm_mmu_page *sp;
-        struct hlist_node *node;
-        LIST_HEAD(invalid_list);
-        u64 entry, gentry, *spte;
-        unsigned pte_size, page_offset, misaligned, quadrant, offset;
-        int level, npte, r, flooded = 0;
-        bool remote_flush, local_flush, zap_page;
-        /*
-         * If we don't have indirect shadow pages, it means no page is
-         * write-protected, so we can exit simply.
-         */
-        if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
-                return;
-        zap_page = remote_flush = local_flush = false;
-        offset = offset_in_page(gpa);
-        pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
        /*
         * Assume that the pte write on a page table of the same type
         * as the current vcpu paging mode since we update the sptes only
         * when they have the same mode.
         */
-        if (is_pae(vcpu) && bytes == 4) {
+        if (is_pae(vcpu) && *bytes == 4) {
                /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
-                gpa &= ~(gpa_t)7;
+                *gpa &= ~(gpa_t)7;
-                bytes = 8;
+                *bytes = 8;
+                r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
-                r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
                if (r)
                        gentry = 0;
                new = (const u8 *)&gentry;
        }
-        switch (bytes) {
+        switch (*bytes) {
        case 4:
                gentry = *(const u32 *)new;
                break;
@@ -3583,71 +3563,135 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
                break;
        }
-        /*
+        return gentry;
-         * No need to care whether allocation memory is successful
+}
-         * or not since pte prefetch is skiped if it does not have
-         * enough objects in the cache.
+/*
-         */
+ * If we're seeing too many writes to a page, it may no longer be a page table,
-        mmu_topup_memory_caches(vcpu);
+ * or we may be forking, in which case it is better to unmap the page.
-        spin_lock(&vcpu->kvm->mmu_lock);
+ */
-        ++vcpu->kvm->stat.mmu_pte_write;
+static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
-        trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
+{
+        bool flooded = false;
        if (gfn == vcpu->arch.last_pt_write_gfn
            && !last_updated_pte_accessed(vcpu)) {
                ++vcpu->arch.last_pt_write_count;
                if (vcpu->arch.last_pt_write_count >= 3)
-                        flooded = 1;
+                        flooded = true;
        } else {
                vcpu->arch.last_pt_write_gfn = gfn;
                vcpu->arch.last_pt_write_count = 1;
                vcpu->arch.last_pte_updated = NULL;
        }
+        return flooded;
+}
+/*
+ * Misaligned accesses are too much trouble to fix up; also, they usually
+ * indicate a page is not used as a page table.
+ */
+static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
+                                    int bytes)
+{
+        unsigned offset, pte_size, misaligned;
+        pgprintk("misaligned: gpa %llx bytes %d role %x\n",
+                 gpa, bytes, sp->role.word);
+        offset = offset_in_page(gpa);
+        pte_size = sp->role.cr4_pae ? 8 : 4;
+        misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
+        misaligned |= bytes < 4;
+        return misaligned;
+}
+static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
+{
+        unsigned page_offset, quadrant;
+        u64 *spte;
+        int level;
+        page_offset = offset_in_page(gpa);
+        level = sp->role.level;
+        *nspte = 1;
+        if (!sp->role.cr4_pae) {
+                page_offset <<= 1;      /* 32->64 */
+                /*
+                 * A 32-bit pde maps 4MB while the shadow pdes map
+                 * only 2MB.  So we need to double the offset again
+                 * and zap two pdes instead of one.
+                 */
+                if (level == PT32_ROOT_LEVEL) {
+                        page_offset &= ~7; /* kill rounding error */
+                        page_offset <<= 1;
+                        *nspte = 2;
+                }
+                quadrant = page_offset >> PAGE_SHIFT;
+                page_offset &= ~PAGE_MASK;
+                if (quadrant != sp->role.quadrant)
+                        return NULL;
+        }
+        spte = &sp->spt[page_offset / sizeof(*spte)];
+        return spte;
+}
+void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+                       const u8 *new, int bytes)
+{
+        gfn_t gfn = gpa >> PAGE_SHIFT;
+        union kvm_mmu_page_role mask = { .word = 0 };
+        struct kvm_mmu_page *sp;
+        struct hlist_node *node;
+        LIST_HEAD(invalid_list);
+        u64 entry, gentry, *spte;
+        int npte;
+        bool remote_flush, local_flush, zap_page, flooded, misaligned;
+        /*
+         * If we don't have indirect shadow pages, it means no page is
+         * write-protected, so we can exit simply.
+         */
+        if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
+                return;
+        zap_page = remote_flush = local_flush = false;
+        pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
+        gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
+        /*
+         * No need to care whether allocation memory is successful
+         * or not since pte prefetch is skiped if it does not have
+         * enough objects in the cache.
+         */
+        mmu_topup_memory_caches(vcpu);
+        spin_lock(&vcpu->kvm->mmu_lock);
+        ++vcpu->kvm->stat.mmu_pte_write;
+        trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
+        flooded = detect_write_flooding(vcpu, gfn);
        mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
        for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
-                pte_size = sp->role.cr4_pae ? 8 : 4;
+                misaligned = detect_write_misaligned(sp, gpa, bytes);
-                misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
-                misaligned |= bytes < 4;
                if (misaligned || flooded) {
-                        /*
-                         * Misaligned accesses are too much trouble to fix
-                         * up; also, they usually indicate a page is not used
-                         * as a page table.
-                         *
-                         * If we're seeing too many writes to a page,
-                         * it may no longer be a page table, or we may be
-                         * forking, in which case it is better to unmap the
-                         * page.
-                         */
-                        pgprintk("misaligned: gpa %llx bytes %d role %x\n",
-                                 gpa, bytes, sp->role.word);
                        zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
                                                     &invalid_list);
                        ++vcpu->kvm->stat.mmu_flooded;
                        continue;
                }
-                page_offset = offset;
-                level = sp->role.level;
+                spte = get_written_sptes(sp, gpa, &npte);
-                npte = 1;
+                if (!spte)
-                if (!sp->role.cr4_pae) {
+                        continue;
-                        page_offset <<= 1;      /* 32->64 */
-                        /*
-                         * A 32-bit pde maps 4MB while the shadow pdes map
-                         * only 2MB.  So we need to double the offset again
-                         * and zap two pdes instead of one.
-                         */
-                        if (level == PT32_ROOT_LEVEL) {
-                                page_offset &= ~7; /* kill rounding error */
-                                page_offset <<= 1;
-                                npte = 2;
-                        }
-                        quadrant = page_offset >> PAGE_SHIFT;
-                        page_offset &= ~PAGE_MASK;
-                        if (quadrant != sp->role.quadrant)
-                                continue;
-                }
                local_flush = true;
-                spte = &sp->spt[page_offset / sizeof(*spte)];
                while (npte--) {
                        entry = *spte;
                        mmu_page_zap_pte(vcpu->kvm, sp, spte);
author	Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>	2011-09-22 04:57:23 -0400
committer	Avi Kivity <avi@redhat.com>	2011-12-27 04:16:59 -0500
commit	889e5cbced6c191bb7e25c1b30b43e59a12561f9 (patch)
tree	4eddee4776696bd93fbff6af68acf7e1146c392f /arch/x86
parent	f8734352c6f9c4f3d85f0c97b7731b7f925c62fd (diff)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7e57938bb86..986aea55366 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c
@@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3530	return !!(spte && (*spte & shadow_accessed_mask));	3530	return !!(spte && (*spte & shadow_accessed_mask));
3531	}	3531	}
3532		3532
3533	void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,	3533	static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu vcpu, gpa_t gpa,
3534	const u8 *new, int bytes)	3534	const u8 new, int bytes)
3535	{	3535	{
3536	gfn_t gfn = gpa >> PAGE_SHIFT;	3536	u64 gentry;
3537	union kvm_mmu_page_role mask = { .word = 0 };	3537	int r;
3538	struct kvm_mmu_page *sp;
3539	struct hlist_node *node;
3540	LIST_HEAD(invalid_list);
3541	u64 entry, gentry, *spte;
3542	unsigned pte_size, page_offset, misaligned, quadrant, offset;
3543	int level, npte, r, flooded = 0;
3544	bool remote_flush, local_flush, zap_page;
3545
3546	/*
3547	* If we don't have indirect shadow pages, it means no page is
3548	* write-protected, so we can exit simply.
3549	*/
3550	if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
3551	return;
3552
3553	zap_page = remote_flush = local_flush = false;
3554	offset = offset_in_page(gpa);
3555
3556	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
3557		3538
3558	/*	3539	/*
3559	* Assume that the pte write on a page table of the same type	3540	* Assume that the pte write on a page table of the same type
3560	* as the current vcpu paging mode since we update the sptes only	3541	* as the current vcpu paging mode since we update the sptes only
3561	* when they have the same mode.	3542	* when they have the same mode.
3562	*/	3543	*/
3563	if (is_pae(vcpu) && bytes == 4) {	3544	if (is_pae(vcpu) && *bytes == 4) {
3564	/* Handle a 32-bit guest writing two halves of a 64-bit gpte */	3545	/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3565	gpa &= ~(gpa_t)7;	3546	*gpa &= ~(gpa_t)7;
3566	bytes = 8;	3547	*bytes = 8;
3567		3548	r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
3568	r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
3569	if (r)	3549	if (r)
3570	gentry = 0;	3550	gentry = 0;
3571	new = (const u8 *)&gentry;	3551	new = (const u8 *)&gentry;
3572	}	3552	}
3573		3553
3574	switch (bytes) {	3554	switch (*bytes) {
3575	case 4:	3555	case 4:
3576	gentry = (const u32 )new;	3556	gentry = (const u32 )new;
3577	break;	3557	break;
@@ -3583,71 +3563,135 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3583	break;	3563	break;
3584	}	3564	}
3585		3565
3586	/*	3566	return gentry;
3587	* No need to care whether allocation memory is successful	3567	}
3588	* or not since pte prefetch is skiped if it does not have	3568
3589	* enough objects in the cache.	3569	/*
3590	*/	3570	* If we're seeing too many writes to a page, it may no longer be a page table,
3591	mmu_topup_memory_caches(vcpu);	3571	* or we may be forking, in which case it is better to unmap the page.
3592	spin_lock(&vcpu->kvm->mmu_lock);	3572	*/
3593	++vcpu->kvm->stat.mmu_pte_write;	3573	static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
3594	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);	3574	{
		3575	bool flooded = false;
		3576
3595	if (gfn == vcpu->arch.last_pt_write_gfn	3577	if (gfn == vcpu->arch.last_pt_write_gfn
3596	&& !last_updated_pte_accessed(vcpu)) {	3578	&& !last_updated_pte_accessed(vcpu)) {
3597	++vcpu->arch.last_pt_write_count;	3579	++vcpu->arch.last_pt_write_count;
3598	if (vcpu->arch.last_pt_write_count >= 3)	3580	if (vcpu->arch.last_pt_write_count >= 3)
3599	flooded = 1;	3581	flooded = true;
3600	} else {	3582	} else {
3601	vcpu->arch.last_pt_write_gfn = gfn;	3583	vcpu->arch.last_pt_write_gfn = gfn;
3602	vcpu->arch.last_pt_write_count = 1;	3584	vcpu->arch.last_pt_write_count = 1;
3603	vcpu->arch.last_pte_updated = NULL;	3585	vcpu->arch.last_pte_updated = NULL;
3604	}	3586	}
3605		3587
		3588	return flooded;
		3589	}
		3590
		3591	/*
		3592	* Misaligned accesses are too much trouble to fix up; also, they usually
		3593	* indicate a page is not used as a page table.
		3594	*/
		3595	static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
		3596	int bytes)
		3597	{
		3598	unsigned offset, pte_size, misaligned;
		3599
		3600	pgprintk("misaligned: gpa %llx bytes %d role %x\n",
		3601	gpa, bytes, sp->role.word);
		3602
		3603	offset = offset_in_page(gpa);
		3604	pte_size = sp->role.cr4_pae ? 8 : 4;
		3605	misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
		3606	misaligned \|= bytes < 4;
		3607
		3608	return misaligned;
		3609	}
		3610
		3611	static u64 get_written_sptes(struct kvm_mmu_page sp, gpa_t gpa, int *nspte)
		3612	{
		3613	unsigned page_offset, quadrant;
		3614	u64 *spte;
		3615	int level;
		3616
		3617	page_offset = offset_in_page(gpa);
		3618	level = sp->role.level;
		3619	*nspte = 1;
		3620	if (!sp->role.cr4_pae) {
		3621	page_offset <<= 1; /* 32->64 */
		3622	/*
		3623	* A 32-bit pde maps 4MB while the shadow pdes map
		3624	* only 2MB. So we need to double the offset again
		3625	* and zap two pdes instead of one.
		3626	*/
		3627	if (level == PT32_ROOT_LEVEL) {
		3628	page_offset &= ~7; /* kill rounding error */
		3629	page_offset <<= 1;
		3630	*nspte = 2;
		3631	}
		3632	quadrant = page_offset >> PAGE_SHIFT;
		3633	page_offset &= ~PAGE_MASK;
		3634	if (quadrant != sp->role.quadrant)
		3635	return NULL;
		3636	}
		3637
		3638	spte = &sp->spt[page_offset / sizeof(*spte)];
		3639	return spte;
		3640	}
		3641
		3642	void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
		3643	const u8 *new, int bytes)
		3644	{
		3645	gfn_t gfn = gpa >> PAGE_SHIFT;
		3646	union kvm_mmu_page_role mask = { .word = 0 };
		3647	struct kvm_mmu_page *sp;
		3648	struct hlist_node *node;
		3649	LIST_HEAD(invalid_list);
		3650	u64 entry, gentry, *spte;
		3651	int npte;
		3652	bool remote_flush, local_flush, zap_page, flooded, misaligned;
		3653
		3654	/*
		3655	* If we don't have indirect shadow pages, it means no page is
		3656	* write-protected, so we can exit simply.
		3657	*/
		3658	if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
		3659	return;
		3660
		3661	zap_page = remote_flush = local_flush = false;
		3662
		3663	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
		3664
		3665	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
		3666
		3667	/*
		3668	* No need to care whether allocation memory is successful
		3669	* or not since pte prefetch is skiped if it does not have
		3670	* enough objects in the cache.
		3671	*/
		3672	mmu_topup_memory_caches(vcpu);
		3673
		3674	spin_lock(&vcpu->kvm->mmu_lock);
		3675	++vcpu->kvm->stat.mmu_pte_write;
		3676	trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
		3677
		3678	flooded = detect_write_flooding(vcpu, gfn);
3606	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;	3679	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
3607	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {	3680	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3608	pte_size = sp->role.cr4_pae ? 8 : 4;	3681	misaligned = detect_write_misaligned(sp, gpa, bytes);
3609	misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);	3682
3610	misaligned \|= bytes < 4;
3611	if (misaligned \|\| flooded) {	3683	if (misaligned \|\| flooded) {
3612	/*
3613	* Misaligned accesses are too much trouble to fix
3614	* up; also, they usually indicate a page is not used
3615	* as a page table.
3616	*
3617	* If we're seeing too many writes to a page,
3618	* it may no longer be a page table, or we may be
3619	* forking, in which case it is better to unmap the
3620	* page.
3621	*/
3622	pgprintk("misaligned: gpa %llx bytes %d role %x\n",
3623	gpa, bytes, sp->role.word);
3624	zap_page \|= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,	3684	zap_page \|= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
3625	&invalid_list);	3685	&invalid_list);
3626	++vcpu->kvm->stat.mmu_flooded;	3686	++vcpu->kvm->stat.mmu_flooded;
3627	continue;	3687	continue;
3628	}	3688	}
3629	page_offset = offset;	3689
3630	level = sp->role.level;	3690	spte = get_written_sptes(sp, gpa, &npte);
3631	npte = 1;	3691	if (!spte)
3632	if (!sp->role.cr4_pae) {	3692	continue;
3633	page_offset <<= 1; /* 32->64 */	3693
3634	/*
3635	* A 32-bit pde maps 4MB while the shadow pdes map
3636	* only 2MB. So we need to double the offset again
3637	* and zap two pdes instead of one.
3638	*/
3639	if (level == PT32_ROOT_LEVEL) {
3640	page_offset &= ~7; /* kill rounding error */
3641	page_offset <<= 1;
3642	npte = 2;
3643	}
3644	quadrant = page_offset >> PAGE_SHIFT;
3645	page_offset &= ~PAGE_MASK;
3646	if (quadrant != sp->role.quadrant)
3647	continue;
3648	}
3649	local_flush = true;	3694	local_flush = true;
3650	spte = &sp->spt[page_offset / sizeof(*spte)];
3651	while (npte--) {	3695	while (npte--) {
3652	entry = *spte;	3696	entry = *spte;
3653	mmu_page_zap_pte(vcpu->kvm, sp, spte);	3697	mmu_page_zap_pte(vcpu->kvm, sp, spte);