diff options
author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2011-09-22 04:57:23 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2011-12-27 04:16:59 -0500 |
commit | 889e5cbced6c191bb7e25c1b30b43e59a12561f9 (patch) | |
tree | 4eddee4776696bd93fbff6af68acf7e1146c392f /arch/x86/kvm | |
parent | f8734352c6f9c4f3d85f0c97b7731b7f925c62fd (diff) |
KVM: MMU: split kvm_mmu_pte_write function
kvm_mmu_pte_write is too long, we split it for better readable
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.c | 194 |
1 files changed, 119 insertions, 75 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7e57938bb86a..986aea55366b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -3530,48 +3530,28 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
3530 | return !!(spte && (*spte & shadow_accessed_mask)); | 3530 | return !!(spte && (*spte & shadow_accessed_mask)); |
3531 | } | 3531 | } |
3532 | 3532 | ||
3533 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 3533 | static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, |
3534 | const u8 *new, int bytes) | 3534 | const u8 *new, int *bytes) |
3535 | { | 3535 | { |
3536 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3536 | u64 gentry; |
3537 | union kvm_mmu_page_role mask = { .word = 0 }; | 3537 | int r; |
3538 | struct kvm_mmu_page *sp; | ||
3539 | struct hlist_node *node; | ||
3540 | LIST_HEAD(invalid_list); | ||
3541 | u64 entry, gentry, *spte; | ||
3542 | unsigned pte_size, page_offset, misaligned, quadrant, offset; | ||
3543 | int level, npte, r, flooded = 0; | ||
3544 | bool remote_flush, local_flush, zap_page; | ||
3545 | |||
3546 | /* | ||
3547 | * If we don't have indirect shadow pages, it means no page is | ||
3548 | * write-protected, so we can exit simply. | ||
3549 | */ | ||
3550 | if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) | ||
3551 | return; | ||
3552 | |||
3553 | zap_page = remote_flush = local_flush = false; | ||
3554 | offset = offset_in_page(gpa); | ||
3555 | |||
3556 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | ||
3557 | 3538 | ||
3558 | /* | 3539 | /* |
3559 | * Assume that the pte write on a page table of the same type | 3540 | * Assume that the pte write on a page table of the same type |
3560 | * as the current vcpu paging mode since we update the sptes only | 3541 | * as the current vcpu paging mode since we update the sptes only |
3561 | * when they have the same mode. | 3542 | * when they have the same mode. |
3562 | */ | 3543 | */ |
3563 | if (is_pae(vcpu) && bytes == 4) { | 3544 | if (is_pae(vcpu) && *bytes == 4) { |
3564 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3545 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
3565 | gpa &= ~(gpa_t)7; | 3546 | *gpa &= ~(gpa_t)7; |
3566 | bytes = 8; | 3547 | *bytes = 8; |
3567 | 3548 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | |
3568 | r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); | ||
3569 | if (r) | 3549 | if (r) |
3570 | gentry = 0; | 3550 | gentry = 0; |
3571 | new = (const u8 *)&gentry; | 3551 | new = (const u8 *)&gentry; |
3572 | } | 3552 | } |
3573 | 3553 | ||
3574 | switch (bytes) { | 3554 | switch (*bytes) { |
3575 | case 4: | 3555 | case 4: |
3576 | gentry = *(const u32 *)new; | 3556 | gentry = *(const u32 *)new; |
3577 | break; | 3557 | break; |
@@ -3583,71 +3563,135 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3583 | break; | 3563 | break; |
3584 | } | 3564 | } |
3585 | 3565 | ||
3586 | /* | 3566 | return gentry; |
3587 | * No need to care whether allocation memory is successful | 3567 | } |
3588 | * or not since pte prefetch is skiped if it does not have | 3568 | |
3589 | * enough objects in the cache. | 3569 | /* |
3590 | */ | 3570 | * If we're seeing too many writes to a page, it may no longer be a page table, |
3591 | mmu_topup_memory_caches(vcpu); | 3571 | * or we may be forking, in which case it is better to unmap the page. |
3592 | spin_lock(&vcpu->kvm->mmu_lock); | 3572 | */ |
3593 | ++vcpu->kvm->stat.mmu_pte_write; | 3573 | static bool detect_write_flooding(struct kvm_vcpu *vcpu, gfn_t gfn) |
3594 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3574 | { |
3575 | bool flooded = false; | ||
3576 | |||
3595 | if (gfn == vcpu->arch.last_pt_write_gfn | 3577 | if (gfn == vcpu->arch.last_pt_write_gfn |
3596 | && !last_updated_pte_accessed(vcpu)) { | 3578 | && !last_updated_pte_accessed(vcpu)) { |
3597 | ++vcpu->arch.last_pt_write_count; | 3579 | ++vcpu->arch.last_pt_write_count; |
3598 | if (vcpu->arch.last_pt_write_count >= 3) | 3580 | if (vcpu->arch.last_pt_write_count >= 3) |
3599 | flooded = 1; | 3581 | flooded = true; |
3600 | } else { | 3582 | } else { |
3601 | vcpu->arch.last_pt_write_gfn = gfn; | 3583 | vcpu->arch.last_pt_write_gfn = gfn; |
3602 | vcpu->arch.last_pt_write_count = 1; | 3584 | vcpu->arch.last_pt_write_count = 1; |
3603 | vcpu->arch.last_pte_updated = NULL; | 3585 | vcpu->arch.last_pte_updated = NULL; |
3604 | } | 3586 | } |
3605 | 3587 | ||
3588 | return flooded; | ||
3589 | } | ||
3590 | |||
3591 | /* | ||
3592 | * Misaligned accesses are too much trouble to fix up; also, they usually | ||
3593 | * indicate a page is not used as a page table. | ||
3594 | */ | ||
3595 | static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, | ||
3596 | int bytes) | ||
3597 | { | ||
3598 | unsigned offset, pte_size, misaligned; | ||
3599 | |||
3600 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
3601 | gpa, bytes, sp->role.word); | ||
3602 | |||
3603 | offset = offset_in_page(gpa); | ||
3604 | pte_size = sp->role.cr4_pae ? 8 : 4; | ||
3605 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | ||
3606 | misaligned |= bytes < 4; | ||
3607 | |||
3608 | return misaligned; | ||
3609 | } | ||
3610 | |||
3611 | static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) | ||
3612 | { | ||
3613 | unsigned page_offset, quadrant; | ||
3614 | u64 *spte; | ||
3615 | int level; | ||
3616 | |||
3617 | page_offset = offset_in_page(gpa); | ||
3618 | level = sp->role.level; | ||
3619 | *nspte = 1; | ||
3620 | if (!sp->role.cr4_pae) { | ||
3621 | page_offset <<= 1; /* 32->64 */ | ||
3622 | /* | ||
3623 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
3624 | * only 2MB. So we need to double the offset again | ||
3625 | * and zap two pdes instead of one. | ||
3626 | */ | ||
3627 | if (level == PT32_ROOT_LEVEL) { | ||
3628 | page_offset &= ~7; /* kill rounding error */ | ||
3629 | page_offset <<= 1; | ||
3630 | *nspte = 2; | ||
3631 | } | ||
3632 | quadrant = page_offset >> PAGE_SHIFT; | ||
3633 | page_offset &= ~PAGE_MASK; | ||
3634 | if (quadrant != sp->role.quadrant) | ||
3635 | return NULL; | ||
3636 | } | ||
3637 | |||
3638 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
3639 | return spte; | ||
3640 | } | ||
3641 | |||
3642 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
3643 | const u8 *new, int bytes) | ||
3644 | { | ||
3645 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
3646 | union kvm_mmu_page_role mask = { .word = 0 }; | ||
3647 | struct kvm_mmu_page *sp; | ||
3648 | struct hlist_node *node; | ||
3649 | LIST_HEAD(invalid_list); | ||
3650 | u64 entry, gentry, *spte; | ||
3651 | int npte; | ||
3652 | bool remote_flush, local_flush, zap_page, flooded, misaligned; | ||
3653 | |||
3654 | /* | ||
3655 | * If we don't have indirect shadow pages, it means no page is | ||
3656 | * write-protected, so we can exit simply. | ||
3657 | */ | ||
3658 | if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) | ||
3659 | return; | ||
3660 | |||
3661 | zap_page = remote_flush = local_flush = false; | ||
3662 | |||
3663 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | ||
3664 | |||
3665 | gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); | ||
3666 | |||
3667 | /* | ||
3668 | * No need to care whether allocation memory is successful | ||
3669 | * or not since pte prefetch is skiped if it does not have | ||
3670 | * enough objects in the cache. | ||
3671 | */ | ||
3672 | mmu_topup_memory_caches(vcpu); | ||
3673 | |||
3674 | spin_lock(&vcpu->kvm->mmu_lock); | ||
3675 | ++vcpu->kvm->stat.mmu_pte_write; | ||
3676 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | ||
3677 | |||
3678 | flooded = detect_write_flooding(vcpu, gfn); | ||
3606 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; | 3679 | mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; |
3607 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { | 3680 | for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { |
3608 | pte_size = sp->role.cr4_pae ? 8 : 4; | 3681 | misaligned = detect_write_misaligned(sp, gpa, bytes); |
3609 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 3682 | |
3610 | misaligned |= bytes < 4; | ||
3611 | if (misaligned || flooded) { | 3683 | if (misaligned || flooded) { |
3612 | /* | ||
3613 | * Misaligned accesses are too much trouble to fix | ||
3614 | * up; also, they usually indicate a page is not used | ||
3615 | * as a page table. | ||
3616 | * | ||
3617 | * If we're seeing too many writes to a page, | ||
3618 | * it may no longer be a page table, or we may be | ||
3619 | * forking, in which case it is better to unmap the | ||
3620 | * page. | ||
3621 | */ | ||
3622 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | ||
3623 | gpa, bytes, sp->role.word); | ||
3624 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, | 3684 | zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, |
3625 | &invalid_list); | 3685 | &invalid_list); |
3626 | ++vcpu->kvm->stat.mmu_flooded; | 3686 | ++vcpu->kvm->stat.mmu_flooded; |
3627 | continue; | 3687 | continue; |
3628 | } | 3688 | } |
3629 | page_offset = offset; | 3689 | |
3630 | level = sp->role.level; | 3690 | spte = get_written_sptes(sp, gpa, &npte); |
3631 | npte = 1; | 3691 | if (!spte) |
3632 | if (!sp->role.cr4_pae) { | 3692 | continue; |
3633 | page_offset <<= 1; /* 32->64 */ | 3693 | |
3634 | /* | ||
3635 | * A 32-bit pde maps 4MB while the shadow pdes map | ||
3636 | * only 2MB. So we need to double the offset again | ||
3637 | * and zap two pdes instead of one. | ||
3638 | */ | ||
3639 | if (level == PT32_ROOT_LEVEL) { | ||
3640 | page_offset &= ~7; /* kill rounding error */ | ||
3641 | page_offset <<= 1; | ||
3642 | npte = 2; | ||
3643 | } | ||
3644 | quadrant = page_offset >> PAGE_SHIFT; | ||
3645 | page_offset &= ~PAGE_MASK; | ||
3646 | if (quadrant != sp->role.quadrant) | ||
3647 | continue; | ||
3648 | } | ||
3649 | local_flush = true; | 3694 | local_flush = true; |
3650 | spte = &sp->spt[page_offset / sizeof(*spte)]; | ||
3651 | while (npte--) { | 3695 | while (npte--) { |
3652 | entry = *spte; | 3696 | entry = *spte; |
3653 | mmu_page_zap_pte(vcpu->kvm, sp, spte); | 3697 | mmu_page_zap_pte(vcpu->kvm, sp, spte); |