aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@cn.fujitsu.com>2011-09-22 04:57:23 -0400
committerAvi Kivity <avi@redhat.com>2011-12-27 04:16:59 -0500
commit889e5cbced6c191bb7e25c1b30b43e59a12561f9 (patch)
tree4eddee4776696bd93fbff6af68acf7e1146c392f /arch/x86
parentf8734352c6f9c4f3d85f0c97b7731b7f925c62fd (diff)
KVM: MMU: split kvm_mmu_pte_write function
kvm_mmu_pte_write is too long, we split it for better readable Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kvm/mmu.c194
1 files changed, 119 insertions, 75 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7e57938bb86..986aea55366 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
3530 return !!(spte && (*spte & shadow_accessed_mask)); 3530 return !!(spte && (*spte & shadow_accessed_mask));
3531} 3531}
3532 3532
3533void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 3533static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
3534 const u8 *new, int bytes) 3534 const u8 *new, int *bytes)
3535{ 3535{
3536 gfn_t gfn = gpa >> PAGE_SHIFT; 3536 u64 gentry;
3537 union kvm_mmu_page_role mask = { .word = 0 }; 3537 int r;
3538 struct kvm_mmu_page *sp;
3539 struct hlist_node *node;
3540 LIST_HEAD(invalid_list);
3541 u64 entry, gentry, *spte;
3542 unsigned pte_size, page_offset, misaligned, quadrant, offset;
3543 int level, npte, r, flooded = 0;
3544 bool remote_flush, local_flush, zap_page;
3545
3546 /*
3547 * If we don't have indirect shadow pages, it means no page is
3548 * write-protected, so we can exit simply.
3549 */
3550 if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
3551 return;
3552
3553 zap_page = remote_flush = local_flush = false;
3554 offset = offset_in_page(gpa);
3555
3556 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
3557 3538
3558 /* 3539 /*
3559 * Assume that the pte write on a page table of the same type 3540 * Assume that the pte write on a page table of the same type
3560 * as the current vcpu paging mode since we update the sptes only 3541 * as the current vcpu paging mode since we update the sptes only
3561 * when they have the same mode. 3542 * when they have the same mode.
3562 */ 3543 */
3563 if (is_pae(vcpu) && bytes == 4) { 3544 if (is_pae(vcpu) && *bytes == 4) {
3564 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ 3545 /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
3565 gpa &= ~(gpa_t)7; 3546 *gpa &= ~(gpa_t)7;
3566 bytes = 8; 3547 *bytes = 8;
3567 3548 r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8));
3568 r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
3569 if (r) 3549 if (r)
3570 gentry = 0; 3550 gentry = 0;
3571 new = (const u8 *)&gentry; 3551 new = (const u8 *)&gentry;
3572 } 3552 }
3573 3553
3574 switch (bytes) { 3554 switch (*bytes) {
3575 case 4: 3555 case 4:
3576 gentry = *(const u32 *)new; 3556 gentry = *(const u32 *)new;
3577 break; 3557 break;
@@ -3583,71 +3563,135 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3583 break; 3563 break;
3584 } 3564 }
3585 3565
3586 /* 3566 return gentry;
3587 * No need to care whether allocation memory is successful 3567}
3588 * or not since pte prefetch is skiped if it does not have 3568
3589 * enough objects in the cache. 3569/*
3590 */ 3570 * If we're seeing too many writes to a page, it may no longer be a page table,
3591 mmu_topup_memory_caches(vcpu); 3571 * or we may be forking, in which case it is better to unmap the page.
3592 spin_lock(&vcpu->kvm->mmu_lock); 3572 */
3593 ++vcpu->kvm->stat.mmu_pte_write; 3573static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn)
3594 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); 3574{
3575 bool flooded = false;
3576
3595 if (gfn == vcpu->arch.last_pt_write_gfn 3577 if (gfn == vcpu->arch.last_pt_write_gfn
3596 && !last_updated_pte_accessed(vcpu)) { 3578 && !last_updated_pte_accessed(vcpu)) {
3597 ++vcpu->arch.last_pt_write_count; 3579 ++vcpu->arch.last_pt_write_count;
3598 if (vcpu->arch.last_pt_write_count >= 3) 3580 if (vcpu->arch.last_pt_write_count >= 3)
3599 flooded = 1; 3581 flooded = true;
3600 } else { 3582 } else {
3601 vcpu->arch.last_pt_write_gfn = gfn; 3583 vcpu->arch.last_pt_write_gfn = gfn;
3602 vcpu->arch.last_pt_write_count = 1; 3584 vcpu->arch.last_pt_write_count = 1;
3603 vcpu->arch.last_pte_updated = NULL; 3585 vcpu->arch.last_pte_updated = NULL;
3604 } 3586 }
3605 3587
3588 return flooded;
3589}
3590
3591/*
3592 * Misaligned accesses are too much trouble to fix up; also, they usually
3593 * indicate a page is not used as a page table.
3594 */
3595static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
3596 int bytes)
3597{
3598 unsigned offset, pte_size, misaligned;
3599
3600 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
3601 gpa, bytes, sp->role.word);
3602
3603 offset = offset_in_page(gpa);
3604 pte_size = sp->role.cr4_pae ? 8 : 4;
3605 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
3606 misaligned |= bytes < 4;
3607
3608 return misaligned;
3609}
3610
3611static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
3612{
3613 unsigned page_offset, quadrant;
3614 u64 *spte;
3615 int level;
3616
3617 page_offset = offset_in_page(gpa);
3618 level = sp->role.level;
3619 *nspte = 1;
3620 if (!sp->role.cr4_pae) {
3621 page_offset <<= 1; /* 32->64 */
3622 /*
3623 * A 32-bit pde maps 4MB while the shadow pdes map
3624 * only 2MB. So we need to double the offset again
3625 * and zap two pdes instead of one.
3626 */
3627 if (level == PT32_ROOT_LEVEL) {
3628 page_offset &= ~7; /* kill rounding error */
3629 page_offset <<= 1;
3630 *nspte = 2;
3631 }
3632 quadrant = page_offset >> PAGE_SHIFT;
3633 page_offset &= ~PAGE_MASK;
3634 if (quadrant != sp->role.quadrant)
3635 return NULL;
3636 }
3637
3638 spte = &sp->spt[page_offset / sizeof(*spte)];
3639 return spte;
3640}
3641
3642void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
3643 const u8 *new, int bytes)
3644{
3645 gfn_t gfn = gpa >> PAGE_SHIFT;
3646 union kvm_mmu_page_role mask = { .word = 0 };
3647 struct kvm_mmu_page *sp;
3648 struct hlist_node *node;
3649 LIST_HEAD(invalid_list);
3650 u64 entry, gentry, *spte;
3651 int npte;
3652 bool remote_flush, local_flush, zap_page, flooded, misaligned;
3653
3654 /*
3655 * If we don't have indirect shadow pages, it means no page is
3656 * write-protected, so we can exit simply.
3657 */
3658 if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
3659 return;
3660
3661 zap_page = remote_flush = local_flush = false;
3662
3663 pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
3664
3665 gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
3666
3667 /*
3668 * No need to care whether allocation memory is successful
3669 * or not since pte prefetch is skiped if it does not have
3670 * enough objects in the cache.
3671 */
3672 mmu_topup_memory_caches(vcpu);
3673
3674 spin_lock(&vcpu->kvm->mmu_lock);
3675 ++vcpu->kvm->stat.mmu_pte_write;
3676 trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
3677
3678 flooded = detect_write_flooding(vcpu, gfn);
3606 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; 3679 mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
3607 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { 3680 for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
3608 pte_size = sp->role.cr4_pae ? 8 : 4; 3681 misaligned = detect_write_misaligned(sp, gpa, bytes);
3609 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 3682
3610 misaligned |= bytes < 4;
3611 if (misaligned || flooded) { 3683 if (misaligned || flooded) {
3612 /*
3613 * Misaligned accesses are too much trouble to fix
3614 * up; also, they usually indicate a page is not used
3615 * as a page table.
3616 *
3617 * If we're seeing too many writes to a page,
3618 * it may no longer be a page table, or we may be
3619 * forking, in which case it is better to unmap the
3620 * page.
3621 */
3622 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
3623 gpa, bytes, sp->role.word);
3624 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, 3684 zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
3625 &invalid_list); 3685 &invalid_list);
3626 ++vcpu->kvm->stat.mmu_flooded; 3686 ++vcpu->kvm->stat.mmu_flooded;
3627 continue; 3687 continue;
3628 } 3688 }
3629 page_offset = offset; 3689
3630 level = sp->role.level; 3690 spte = get_written_sptes(sp, gpa, &npte);
3631 npte = 1; 3691 if (!spte)
3632 if (!sp->role.cr4_pae) { 3692 continue;
3633 page_offset <<= 1; /* 32->64 */ 3693
3634 /*
3635 * A 32-bit pde maps 4MB while the shadow pdes map
3636 * only 2MB. So we need to double the offset again
3637 * and zap two pdes instead of one.
3638 */
3639 if (level == PT32_ROOT_LEVEL) {
3640 page_offset &= ~7; /* kill rounding error */
3641 page_offset <<= 1;
3642 npte = 2;
3643 }
3644 quadrant = page_offset >> PAGE_SHIFT;
3645 page_offset &= ~PAGE_MASK;
3646 if (quadrant != sp->role.quadrant)
3647 continue;
3648 }
3649 local_flush = true; 3694 local_flush = true;
3650 spte = &sp->spt[page_offset / sizeof(*spte)];
3651 while (npte--) { 3695 while (npte--) {
3652 entry = *spte; 3696 entry = *spte;
3653 mmu_page_zap_pte(vcpu->kvm, sp, spte); 3697 mmu_page_zap_pte(vcpu->kvm, sp, spte);