aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunaid Shahid <junaids@google.com>2016-12-06 19:46:12 -0500
committerRadim Krčmář <rkrcmar@redhat.com>2017-01-09 08:46:07 -0500
commit97dceba29a6acbb28d16c8c5757ae9f4e1e482ea (patch)
tree05b14a1e57c4e75118f498db8c805ca2da2c84b3
parentea4114bcd3a8c84f0eb0b52e56d348c27ddede2e (diff)
kvm: x86: mmu: Fast Page Fault path retries
This change adds retries into the Fast Page Fault path. Without the retries, the code still works, but if a retry does end up being needed, then it will result in a second page fault for the same memory access, which will cause much more overhead compared to just retrying within the original fault. This would be especially useful with the upcoming fast access tracking change, as that would make it more likely for retries to be needed (e.g. due to read and write faults happening on different CPUs at the same time). Signed-off-by: Junaid Shahid <junaids@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kvm/mmu.c124
1 files changed, 73 insertions, 51 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e923f393ac26..f6d3505c8d18 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2891,6 +2891,10 @@ static bool page_fault_can_be_fast(u32 error_code)
2891 return true; 2891 return true;
2892} 2892}
2893 2893
2894/*
2895 * Returns true if the SPTE was fixed successfully. Otherwise,
2896 * someone else modified the SPTE from its original value.
2897 */
2894static bool 2898static bool
2895fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 2899fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
2896 u64 *sptep, u64 spte) 2900 u64 *sptep, u64 spte)
@@ -2917,8 +2921,10 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
2917 * 2921 *
2918 * Compare with set_spte where instead shadow_dirty_mask is set. 2922 * Compare with set_spte where instead shadow_dirty_mask is set.
2919 */ 2923 */
2920 if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) 2924 if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) != spte)
2921 kvm_vcpu_mark_page_dirty(vcpu, gfn); 2925 return false;
2926
2927 kvm_vcpu_mark_page_dirty(vcpu, gfn);
2922 2928
2923 return true; 2929 return true;
2924} 2930}
@@ -2933,8 +2939,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2933{ 2939{
2934 struct kvm_shadow_walk_iterator iterator; 2940 struct kvm_shadow_walk_iterator iterator;
2935 struct kvm_mmu_page *sp; 2941 struct kvm_mmu_page *sp;
2936 bool ret = false; 2942 bool fault_handled = false;
2937 u64 spte = 0ull; 2943 u64 spte = 0ull;
2944 uint retry_count = 0;
2938 2945
2939 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) 2946 if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
2940 return false; 2947 return false;
@@ -2947,62 +2954,77 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2947 if (!is_shadow_present_pte(spte) || iterator.level < level) 2954 if (!is_shadow_present_pte(spte) || iterator.level < level)
2948 break; 2955 break;
2949 2956
2950 /* 2957 do {
2951 * If the mapping has been changed, let the vcpu fault on the 2958 /*
2952 * same address again. 2959 * If the mapping has been changed, let the vcpu fault on the
2953 */ 2960 * same address again.
2954 if (!is_shadow_present_pte(spte)) { 2961 */
2955 ret = true; 2962 if (!is_shadow_present_pte(spte)) {
2956 goto exit; 2963 fault_handled = true;
2957 } 2964 break;
2965 }
2958 2966
2959 sp = page_header(__pa(iterator.sptep)); 2967 sp = page_header(__pa(iterator.sptep));
2960 if (!is_last_spte(spte, sp->role.level)) 2968 if (!is_last_spte(spte, sp->role.level))
2961 goto exit; 2969 break;
2962 2970
2963 /* 2971 /*
2964 * Check if it is a spurious fault caused by TLB lazily flushed. 2972 * Check if it is a spurious fault caused by TLB lazily flushed.
2965 * 2973 *
2966 * Need not check the access of upper level table entries since 2974 * Need not check the access of upper level table entries since
2967 * they are always ACC_ALL. 2975 * they are always ACC_ALL.
2968 */ 2976 */
2969 if (is_writable_pte(spte)) { 2977 if (is_writable_pte(spte)) {
2970 ret = true; 2978 fault_handled = true;
2971 goto exit; 2979 break;
2972 } 2980 }
2973 2981
2974 /* 2982 /*
2975 * Currently, to simplify the code, only the spte write-protected 2983 * Currently, to simplify the code, only the spte
2976 * by dirty-log can be fast fixed. 2984 * write-protected by dirty-log can be fast fixed.
2977 */ 2985 */
2978 if (!spte_can_locklessly_be_made_writable(spte)) 2986 if (!spte_can_locklessly_be_made_writable(spte))
2979 goto exit; 2987 break;
2980 2988
2981 /* 2989 /*
2982 * Do not fix write-permission on the large spte since we only dirty 2990 * Do not fix write-permission on the large spte since we only
2983 * the first page into the dirty-bitmap in fast_pf_fix_direct_spte() 2991 * dirty the first page into the dirty-bitmap in
2984 * that means other pages are missed if its slot is dirty-logged. 2992 * fast_pf_fix_direct_spte() that means other pages are missed
2985 * 2993 * if its slot is dirty-logged.
2986 * Instead, we let the slow page fault path create a normal spte to 2994 *
2987 * fix the access. 2995 * Instead, we let the slow page fault path create a normal spte
2988 * 2996 * to fix the access.
2989 * See the comments in kvm_arch_commit_memory_region(). 2997 *
2990 */ 2998 * See the comments in kvm_arch_commit_memory_region().
2991 if (sp->role.level > PT_PAGE_TABLE_LEVEL) 2999 */
2992 goto exit; 3000 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
3001 break;
3002
3003 /*
3004 * Currently, fast page fault only works for direct mapping
3005 * since the gfn is not stable for indirect shadow page. See
3006 * Documentation/virtual/kvm/locking.txt to get more detail.
3007 */
3008 fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
3009 iterator.sptep, spte);
3010 if (fault_handled)
3011 break;
3012
3013 if (++retry_count > 4) {
3014 printk_once(KERN_WARNING
3015 "kvm: Fast #PF retrying more than 4 times.\n");
3016 break;
3017 }
3018
3019 spte = mmu_spte_get_lockless(iterator.sptep);
3020
3021 } while (true);
2993 3022
2994 /*
2995 * Currently, fast page fault only works for direct mapping since
2996 * the gfn is not stable for indirect shadow page.
2997 * See Documentation/virtual/kvm/locking.txt to get more detail.
2998 */
2999 ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
3000exit:
3001 trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, 3023 trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
3002 spte, ret); 3024 spte, fault_handled);
3003 walk_shadow_page_lockless_end(vcpu); 3025 walk_shadow_page_lockless_end(vcpu);
3004 3026
3005 return ret; 3027 return fault_handled;
3006} 3028}
3007 3029
3008static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, 3030static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,