aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kvm/mmu.c144
1 files changed, 127 insertions, 17 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b160652f7eee..8637bffbdb4a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -446,8 +446,22 @@ static bool __check_direct_spte_mmio_pf(u64 spte)
446} 446}
447#endif 447#endif
448 448
449static bool spte_is_locklessly_modifiable(u64 spte)
450{
451 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
452}
453
449static bool spte_has_volatile_bits(u64 spte) 454static bool spte_has_volatile_bits(u64 spte)
450{ 455{
456 /*
457 * Always atomicly update spte if it can be updated
458 * out of mmu-lock, it can ensure dirty bit is not lost,
459 * also, it can help us to get a stable is_writable_pte()
460 * to ensure tlb flush is not missed.
461 */
462 if (spte_is_locklessly_modifiable(spte))
463 return true;
464
451 if (!shadow_accessed_mask) 465 if (!shadow_accessed_mask)
452 return false; 466 return false;
453 467
@@ -489,7 +503,7 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte)
489 */ 503 */
490static bool mmu_spte_update(u64 *sptep, u64 new_spte) 504static bool mmu_spte_update(u64 *sptep, u64 new_spte)
491{ 505{
492 u64 mask, old_spte = *sptep; 506 u64 old_spte = *sptep;
493 bool ret = false; 507 bool ret = false;
494 508
495 WARN_ON(!is_rmap_spte(new_spte)); 509 WARN_ON(!is_rmap_spte(new_spte));
@@ -499,17 +513,16 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
499 return ret; 513 return ret;
500 } 514 }
501 515
502 new_spte |= old_spte & shadow_dirty_mask; 516 if (!spte_has_volatile_bits(old_spte))
503
504 mask = shadow_accessed_mask;
505 if (is_writable_pte(old_spte))
506 mask |= shadow_dirty_mask;
507
508 if (!spte_has_volatile_bits(old_spte) || (new_spte & mask) == mask)
509 __update_clear_spte_fast(sptep, new_spte); 517 __update_clear_spte_fast(sptep, new_spte);
510 else 518 else
511 old_spte = __update_clear_spte_slow(sptep, new_spte); 519 old_spte = __update_clear_spte_slow(sptep, new_spte);
512 520
521 /*
522 * For the spte updated out of mmu-lock is safe, since
523 * we always atomicly update it, see the comments in
524 * spte_has_volatile_bits().
525 */
513 if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) 526 if (is_writable_pte(old_spte) && !is_writable_pte(new_spte))
514 ret = true; 527 ret = true;
515 528
@@ -1085,11 +1098,6 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1085 kvm_flush_remote_tlbs(vcpu->kvm); 1098 kvm_flush_remote_tlbs(vcpu->kvm);
1086} 1099}
1087 1100
1088static bool spte_is_locklessly_modifiable(u64 spte)
1089{
1090 return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE));
1091}
1092
1093/* 1101/*
1094 * Write-protect on the specified @sptep, @pt_protect indicates whether 1102 * Write-protect on the specified @sptep, @pt_protect indicates whether
1095 * spte writ-protection is caused by protecting shadow page table. 1103 * spte writ-protection is caused by protecting shadow page table.
@@ -2677,18 +2685,114 @@ exit:
2677 return ret; 2685 return ret;
2678} 2686}
2679 2687
2688static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
2689{
2690 /*
2691 * #PF can be fast only if the shadow page table is present and it
2692 * is caused by write-protect, that means we just need change the
2693 * W bit of the spte which can be done out of mmu-lock.
2694 */
2695 if (!(error_code & PFERR_PRESENT_MASK) ||
2696 !(error_code & PFERR_WRITE_MASK))
2697 return false;
2698
2699 return true;
2700}
2701
2702static bool
2703fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte)
2704{
2705 struct kvm_mmu_page *sp = page_header(__pa(sptep));
2706 gfn_t gfn;
2707
2708 WARN_ON(!sp->role.direct);
2709
2710 /*
2711 * The gfn of direct spte is stable since it is calculated
2712 * by sp->gfn.
2713 */
2714 gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
2715
2716 if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte)
2717 mark_page_dirty(vcpu->kvm, gfn);
2718
2719 return true;
2720}
2721
2722/*
2723 * Return value:
2724 * - true: let the vcpu to access on the same address again.
2725 * - false: let the real page fault path to fix it.
2726 */
2727static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2728 u32 error_code)
2729{
2730 struct kvm_shadow_walk_iterator iterator;
2731 bool ret = false;
2732 u64 spte = 0ull;
2733
2734 if (!page_fault_can_be_fast(vcpu, error_code))
2735 return false;
2736
2737 walk_shadow_page_lockless_begin(vcpu);
2738 for_each_shadow_entry_lockless(vcpu, gva, iterator, spte)
2739 if (!is_shadow_present_pte(spte) || iterator.level < level)
2740 break;
2741
2742 /*
2743 * If the mapping has been changed, let the vcpu fault on the
2744 * same address again.
2745 */
2746 if (!is_rmap_spte(spte)) {
2747 ret = true;
2748 goto exit;
2749 }
2750
2751 if (!is_last_spte(spte, level))
2752 goto exit;
2753
2754 /*
2755 * Check if it is a spurious fault caused by TLB lazily flushed.
2756 *
2757 * Need not check the access of upper level table entries since
2758 * they are always ACC_ALL.
2759 */
2760 if (is_writable_pte(spte)) {
2761 ret = true;
2762 goto exit;
2763 }
2764
2765 /*
2766 * Currently, to simplify the code, only the spte write-protected
2767 * by dirty-log can be fast fixed.
2768 */
2769 if (!spte_is_locklessly_modifiable(spte))
2770 goto exit;
2771
2772 /*
2773 * Currently, fast page fault only works for direct mapping since
2774 * the gfn is not stable for indirect shadow page.
2775 * See Documentation/virtual/kvm/locking.txt to get more detail.
2776 */
2777 ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte);
2778exit:
2779 walk_shadow_page_lockless_end(vcpu);
2780
2781 return ret;
2782}
2783
2680static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, 2784static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
2681 gva_t gva, pfn_t *pfn, bool write, bool *writable); 2785 gva_t gva, pfn_t *pfn, bool write, bool *writable);
2682 2786
2683static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, 2787static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
2684 bool prefault) 2788 gfn_t gfn, bool prefault)
2685{ 2789{
2686 int r; 2790 int r;
2687 int level; 2791 int level;
2688 int force_pt_level; 2792 int force_pt_level;
2689 pfn_t pfn; 2793 pfn_t pfn;
2690 unsigned long mmu_seq; 2794 unsigned long mmu_seq;
2691 bool map_writable; 2795 bool map_writable, write = error_code & PFERR_WRITE_MASK;
2692 2796
2693 force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); 2797 force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn);
2694 if (likely(!force_pt_level)) { 2798 if (likely(!force_pt_level)) {
@@ -2705,6 +2809,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
2705 } else 2809 } else
2706 level = PT_PAGE_TABLE_LEVEL; 2810 level = PT_PAGE_TABLE_LEVEL;
2707 2811
2812 if (fast_page_fault(vcpu, v, level, error_code))
2813 return 0;
2814
2708 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2815 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2709 smp_rmb(); 2816 smp_rmb();
2710 2817
@@ -3093,7 +3200,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
3093 gfn = gva >> PAGE_SHIFT; 3200 gfn = gva >> PAGE_SHIFT;
3094 3201
3095 return nonpaging_map(vcpu, gva & PAGE_MASK, 3202 return nonpaging_map(vcpu, gva & PAGE_MASK,
3096 error_code & PFERR_WRITE_MASK, gfn, prefault); 3203 error_code, gfn, prefault);
3097} 3204}
3098 3205
3099static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) 3206static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
@@ -3173,6 +3280,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3173 } else 3280 } else
3174 level = PT_PAGE_TABLE_LEVEL; 3281 level = PT_PAGE_TABLE_LEVEL;
3175 3282
3283 if (fast_page_fault(vcpu, gpa, level, error_code))
3284 return 0;
3285
3176 mmu_seq = vcpu->kvm->mmu_notifier_seq; 3286 mmu_seq = vcpu->kvm->mmu_notifier_seq;
3177 smp_rmb(); 3287 smp_rmb();
3178 3288