aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2012-06-20 03:59:18 -0400
committerAvi Kivity <avi@redhat.com>2012-07-11 09:51:20 -0400
commitc7ba5b48cc8ddc015a9e0463813ca1e60bc42c59 (patch)
treebb89a83af218d10f7d6f8ca4bae5c19fbfba7609
parent49fde3406f3266c5af9430467672c20b63a31e83 (diff)
KVM: MMU: fast path of handling guest page fault
If the the present bit of page fault error code is set, it indicates the shadow page is populated on all levels, it means what we do is only modify the access bit which can be done out of mmu-lock Currently, in order to simplify the code, we only fix the page fault caused by write-protect on the fast path Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r--arch/x86/kvm/mmu.c144
1 files changed, 127 insertions, 17 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b160652f7eee..8637bffbdb4a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -446,8 +446,22 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
446} 446}
447#endif 447#endif
448 448
449static bool spte_is_locklessly_modifiable(u64 spte)
450{
451 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
452}
453
449static bool spte_has_volatile_bits(u64 spte) 454static bool spte_has_volatile_bits(u64 spte)
450{ 455{
456 /*
457 * Always atomicly update spte if it can be updated
458 * out of mmu-lock, it can ensure dirty bit is not lost,
459 * also, it can help us to get a stable is_writable_pte()
460 * to ensure tlb flush is not missed.
461 */
462 if (spte_is_locklessly_modifiable(spte))
463 return true;
464
451 if (!shadow_accessed_mask) 465 if (!shadow_accessed_mask)
452 return false; 466 return false;
453 467
@@ -489,7 +503,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
489 */ 503 */
490static bool mmu_spte_update(u64 *sptep, u64 new_spte) 504static bool mmu_spte_update(u64 *sptep, u64 new_spte)
491{ 505{
492 u64 mask, old_spte = *sptep; 506 u64 old_spte = *sptep;
493 bool ret = false; 507 bool ret = false;
494 508
495 WARN_ON(!is_rmap_spte(new_spte)); 509 WARN_ON(!is_rmap_spte(new_spte));
@@ -499,17 +513,16 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
499 return ret; 513 return ret;
500 } 514 }
501 515
502 new_spte |= old_spte & shadow_dirty_mask; 516 if (!spte_has_volatile_bits(old_spte))
503
504 mask = shadow_accessed_mask;
505 if (is_writable_pte(old_spte))
506 mask |= shadow_dirty_mask;
507
508 if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask)
509 __update_clear_spte_fast(sptep, new_spte); 517 __update_clear_spte_fast(sptep, new_spte);
510 else 518 else
511 old_spte = __update_clear_spte_slow(sptep, new_spte); 519 old_spte = __update_clear_spte_slow(sptep, new_spte);
512 520
521 /*
522 * For the spte updated out of mmu-lock is safe, since
523 * we always atomicly update it, see the comments in
524 * spte_has_volatile_bits().
525 */
513 if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) 526 if (is_writable_pte(old_spte) && !is_writable_pte(new_spte))
514 ret = true; 527 ret = true;
515 528
@@ -1085,11 +1098,6 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1085 kvm_flush_remote_tlbs(vcpu->kvm); 1098 kvm_flush_remote_tlbs(vcpu->kvm);
1086} 1099}
1087 1100
1088static bool spte_is_locklessly_modifiable(u64 spte)
1089{
1090 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
1091}
1092
1093/* 1101/*
1094 * Write-protect on the specified @sptep, @pt_protect indicates whether 1102 * Write-protect on the specified @sptep, @pt_protect indicates whether
1095 * spte writ-protection is caused by protecting shadow page table. 1103 * spte writ-protection is caused by protecting shadow page table.
@@ -2677,18 +2685,114 @@ exit:
2677 return ret; 2685 return ret;
2678} 2686}
2679 2687
2688static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
2689{
2690 /*
2691 * #PF can be fast only if the shadow page table is present and it
2692 * is caused by write-protect, that means we just need change the
2693 * W bit of the spte which can be done out of mmu-lock.
2694 */
2695 if (!(error_code & PFERR_PRESENT_MASK) ||
2696 !(error_code & PFERR_WRITE_MASK))
2697 return false;
2698
2699 return true;
2700}
2701
2702static bool
2703fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte)
2704{
2705 struct kvm_mmu_page *sp = page_header(__pa(sptep));
2706 gfn_t gfn;
2707
2708 WARN_ON(!sp->role.direct);
2709
2710 /*
2711 * The gfn of direct spte is stable since it is calculated
2712 * by sp->gfn.
2713 */
2714 gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
2715
2716 if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
2717 mark_page_dirty(vcpu->kvm, gfn);
2718
2719 return true;
2720}
2721
2722/*
2723 * Return value:
2724 * - true: let the vcpu to access on the same address again.
2725 * - false: let the real page fault path to fix it.
2726 */
2727static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2728 u32 error_code)
2729{
2730 struct kvm_shadow_walk_iterator iterator;
2731 bool ret = false;
2732 u64 spte = 0ull;
2733
2734 if (!page_fault_can_be_fast(vcpu, error_code))
2735 return false;
2736
2737 walk_shadow_page_lockless_begin(vcpu);
2738 for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
2739 if (!is_shadow_present_pte(spte) || iterator.level < level)
2740 break;
2741
2742 /*
2743 * If the mapping has been changed, let the vcpu fault on the
2744 * same address again.
2745 */
2746 if (!is_rmap_spte(spte)) {
2747 ret = true;
2748 goto exit;
2749 }
2750
2751 if (!is_last_spte(spte, level))
2752 goto exit;
2753
2754 /*
2755 * Check if it is a spurious fault caused by TLB lazily flushed.
2756 *
2757 * Need not check the access of upper level table entries since
2758 * they are always ACC_ALL.
2759 */
2760 if (is_writable_pte(spte)) {
2761 ret = true;
2762 goto exit;
2763 }
2764
2765 /*
2766 * Currently, to simplify the code, only the spte write-protected
2767 * by dirty-log can be fast fixed.
2768 */
2769 if (!spte_is_locklessly_modifiable(spte))
2770 goto exit;
2771
2772 /*
2773 * Currently, fast page fault only works for direct mapping since
2774 * the gfn is not stable for indirect shadow page.
2775 * See Documentation/virtual/kvm/locking.txt to get more detail.
2776 */
2777 ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte);
2778exit:
2779 walk_shadow_page_lockless_end(vcpu);
2780
2781 return ret;
2782}
2783
2680static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, 2784static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
2681 gva_t gva, pfn_t *pfn, bool write, bool *writable); 2785 gva_t gva, pfn_t *pfn, bool write, bool *writable);
2682 2786
2683static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, 2787static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
2684 bool prefault) 2788 gfn_t gfn, bool prefault)
2685{ 2789{
2686 int r; 2790 int r;
2687 int level; 2791 int level;
2688 int force_pt_level; 2792 int force_pt_level;
2689 pfn_t pfn; 2793 pfn_t pfn;
2690 unsigned long mmu_seq; 2794 unsigned long mmu_seq;
2691 bool map_writable; 2795 bool map_writable, write = error_code & PFERR_WRITE_MASK;
2692 2796
2693 force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); 2797 force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
2694 if (likely(!force_pt_level)) { 2798 if (likely(!force_pt_level)) {
@@ -2705,6 +2809,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
2705 } else 2809 } else
2706 level = PT_PAGE_TABLE_LEVEL; 2810 level = PT_PAGE_TABLE_LEVEL;
2707 2811
2812 if (fast_page_fault(vcpu, v, level, error_code))
2813 return 0;
2814
2708 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2815 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2709 smp_rmb(); 2816 smp_rmb();
2710 2817
@@ -3093,7 +3200,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
3093 gfn = gva >> PAGE_SHIFT; 3200 gfn = gva >> PAGE_SHIFT;
3094 3201
3095 return nonpaging_map(vcpu, gva & PAGE_MASK, 3202 return nonpaging_map(vcpu, gva & PAGE_MASK,
3096 error_code & PFERR_WRITE_MASK, gfn, prefault); 3203 error_code, gfn, prefault);
3097} 3204}
3098 3205
3099static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) 3206static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
@@ -3173,6 +3280,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3173 } else 3280 } else
3174 level = PT_PAGE_TABLE_LEVEL; 3281 level = PT_PAGE_TABLE_LEVEL;
3175 3282
3283 if (fast_page_fault(vcpu, gpa, level, error_code))
3284 return 0;
3285
3176 mmu_seq = vcpu->kvm->mmu_notifier_seq; 3286 mmu_seq = vcpu->kvm->mmu_notifier_seq;
3177 smp_rmb(); 3287 smp_rmb();
3178 3288