aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c115
1 files changed, 54 insertions, 61 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a119b361b8b7..e5e66e5c6640 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -150,6 +150,20 @@ module_param(dbg, bool, 0644);
150/* make pte_list_desc fit well in cache line */ 150/* make pte_list_desc fit well in cache line */
151#define PTE_LIST_EXT 3 151#define PTE_LIST_EXT 3
152 152
153/*
154 * Return values of handle_mmio_page_fault and mmu.page_fault:
155 * RET_PF_RETRY: let CPU fault again on the address.
156 * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
157 *
158 * For handle_mmio_page_fault only:
159 * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
160 */
161enum {
162 RET_PF_RETRY = 0,
163 RET_PF_EMULATE = 1,
164 RET_PF_INVALID = 2,
165};
166
153struct pte_list_desc { 167struct pte_list_desc {
154 u64 *sptes[PTE_LIST_EXT]; 168 u64 *sptes[PTE_LIST_EXT];
155 struct pte_list_desc *more; 169 struct pte_list_desc *more;
@@ -2424,7 +2438,7 @@ static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
2424 2438
2425static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) 2439static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
2426{ 2440{
2427 return __shadow_walk_next(iterator, *iterator->sptep); 2441 __shadow_walk_next(iterator, *iterator->sptep);
2428} 2442}
2429 2443
2430static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, 2444static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -2794,13 +2808,13 @@ done:
2794 return ret; 2808 return ret;
2795} 2809}
2796 2810
2797static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, 2811static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
2798 int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, 2812 int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
2799 bool speculative, bool host_writable) 2813 bool speculative, bool host_writable)
2800{ 2814{
2801 int was_rmapped = 0; 2815 int was_rmapped = 0;
2802 int rmap_count; 2816 int rmap_count;
2803 bool emulate = false; 2817 int ret = RET_PF_RETRY;
2804 2818
2805 pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, 2819 pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
2806 *sptep, write_fault, gfn); 2820 *sptep, write_fault, gfn);
@@ -2830,12 +2844,12 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
2830 if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, 2844 if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
2831 true, host_writable)) { 2845 true, host_writable)) {
2832 if (write_fault) 2846 if (write_fault)
2833 emulate = true; 2847 ret = RET_PF_EMULATE;
2834 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); 2848 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2835 } 2849 }
2836 2850
2837 if (unlikely(is_mmio_spte(*sptep))) 2851 if (unlikely(is_mmio_spte(*sptep)))
2838 emulate = true; 2852 ret = RET_PF_EMULATE;
2839 2853
2840 pgprintk("%s: setting spte %llx\n", __func__, *sptep); 2854 pgprintk("%s: setting spte %llx\n", __func__, *sptep);
2841 pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", 2855 pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
@@ -2855,7 +2869,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
2855 2869
2856 kvm_release_pfn_clean(pfn); 2870 kvm_release_pfn_clean(pfn);
2857 2871
2858 return emulate; 2872 return ret;
2859} 2873}
2860 2874
2861static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, 2875static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
@@ -2994,14 +3008,13 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
2994 * Do not cache the mmio info caused by writing the readonly gfn 3008 * Do not cache the mmio info caused by writing the readonly gfn
2995 * into the spte otherwise read access on readonly gfn also can 3009 * into the spte otherwise read access on readonly gfn also can
2996 * caused mmio page fault and treat it as mmio access. 3010 * caused mmio page fault and treat it as mmio access.
2997 * Return 1 to tell kvm to emulate it.
2998 */ 3011 */
2999 if (pfn == KVM_PFN_ERR_RO_FAULT) 3012 if (pfn == KVM_PFN_ERR_RO_FAULT)
3000 return 1; 3013 return RET_PF_EMULATE;
3001 3014
3002 if (pfn == KVM_PFN_ERR_HWPOISON) { 3015 if (pfn == KVM_PFN_ERR_HWPOISON) {
3003 kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current); 3016 kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current);
3004 return 0; 3017 return RET_PF_RETRY;
3005 } 3018 }
3006 3019
3007 return -EFAULT; 3020 return -EFAULT;
@@ -3286,13 +3299,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
3286 } 3299 }
3287 3300
3288 if (fast_page_fault(vcpu, v, level, error_code)) 3301 if (fast_page_fault(vcpu, v, level, error_code))
3289 return 0; 3302 return RET_PF_RETRY;
3290 3303
3291 mmu_seq = vcpu->kvm->mmu_notifier_seq; 3304 mmu_seq = vcpu->kvm->mmu_notifier_seq;
3292 smp_rmb(); 3305 smp_rmb();
3293 3306
3294 if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) 3307 if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
3295 return 0; 3308 return RET_PF_RETRY;
3296 3309
3297 if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) 3310 if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
3298 return r; 3311 return r;
@@ -3312,7 +3325,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
3312out_unlock: 3325out_unlock:
3313 spin_unlock(&vcpu->kvm->mmu_lock); 3326 spin_unlock(&vcpu->kvm->mmu_lock);
3314 kvm_release_pfn_clean(pfn); 3327 kvm_release_pfn_clean(pfn);
3315 return 0; 3328 return RET_PF_RETRY;
3316} 3329}
3317 3330
3318 3331
@@ -3659,54 +3672,38 @@ exit:
3659 return reserved; 3672 return reserved;
3660} 3673}
3661 3674
3662/*
3663 * Return values of handle_mmio_page_fault:
3664 * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction
3665 * directly.
3666 * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page
3667 * fault path update the mmio spte.
3668 * RET_MMIO_PF_RETRY: let CPU fault again on the address.
3669 * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed).
3670 */
3671enum {
3672 RET_MMIO_PF_EMULATE = 1,
3673 RET_MMIO_PF_INVALID = 2,
3674 RET_MMIO_PF_RETRY = 0,
3675 RET_MMIO_PF_BUG = -1
3676};
3677
3678static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) 3675static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
3679{ 3676{
3680 u64 spte; 3677 u64 spte;
3681 bool reserved; 3678 bool reserved;
3682 3679
3683 if (mmio_info_in_cache(vcpu, addr, direct)) 3680 if (mmio_info_in_cache(vcpu, addr, direct))
3684 return RET_MMIO_PF_EMULATE; 3681 return RET_PF_EMULATE;
3685 3682
3686 reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); 3683 reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
3687 if (WARN_ON(reserved)) 3684 if (WARN_ON(reserved))
3688 return RET_MMIO_PF_BUG; 3685 return -EINVAL;
3689 3686
3690 if (is_mmio_spte(spte)) { 3687 if (is_mmio_spte(spte)) {
3691 gfn_t gfn = get_mmio_spte_gfn(spte); 3688 gfn_t gfn = get_mmio_spte_gfn(spte);
3692 unsigned access = get_mmio_spte_access(spte); 3689 unsigned access = get_mmio_spte_access(spte);
3693 3690
3694 if (!check_mmio_spte(vcpu, spte)) 3691 if (!check_mmio_spte(vcpu, spte))
3695 return RET_MMIO_PF_INVALID; 3692 return RET_PF_INVALID;
3696 3693
3697 if (direct) 3694 if (direct)
3698 addr = 0; 3695 addr = 0;
3699 3696
3700 trace_handle_mmio_page_fault(addr, gfn, access); 3697 trace_handle_mmio_page_fault(addr, gfn, access);
3701 vcpu_cache_mmio_info(vcpu, addr, gfn, access); 3698 vcpu_cache_mmio_info(vcpu, addr, gfn, access);
3702 return RET_MMIO_PF_EMULATE; 3699 return RET_PF_EMULATE;
3703 } 3700 }
3704 3701
3705 /* 3702 /*
3706 * If the page table is zapped by other cpus, let CPU fault again on 3703 * If the page table is zapped by other cpus, let CPU fault again on
3707 * the address. 3704 * the address.
3708 */ 3705 */
3709 return RET_MMIO_PF_RETRY; 3706 return RET_PF_RETRY;
3710} 3707}
3711EXPORT_SYMBOL_GPL(handle_mmio_page_fault); 3708EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
3712 3709
@@ -3756,7 +3753,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
3756 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); 3753 pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
3757 3754
3758 if (page_fault_handle_page_track(vcpu, error_code, gfn)) 3755 if (page_fault_handle_page_track(vcpu, error_code, gfn))
3759 return 1; 3756 return RET_PF_EMULATE;
3760 3757
3761 r = mmu_topup_memory_caches(vcpu); 3758 r = mmu_topup_memory_caches(vcpu);
3762 if (r) 3759 if (r)
@@ -3820,8 +3817,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
3820} 3817}
3821 3818
3822int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, 3819int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
3823 u64 fault_address, char *insn, int insn_len, 3820 u64 fault_address, char *insn, int insn_len)
3824 bool need_unprotect)
3825{ 3821{
3826 int r = 1; 3822 int r = 1;
3827 3823
@@ -3829,7 +3825,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
3829 default: 3825 default:
3830 trace_kvm_page_fault(fault_address, error_code); 3826 trace_kvm_page_fault(fault_address, error_code);
3831 3827
3832 if (need_unprotect && kvm_event_needs_reinjection(vcpu)) 3828 if (kvm_event_needs_reinjection(vcpu))
3833 kvm_mmu_unprotect_page_virt(vcpu, fault_address); 3829 kvm_mmu_unprotect_page_virt(vcpu, fault_address);
3834 r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, 3830 r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
3835 insn_len); 3831 insn_len);
@@ -3876,7 +3872,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3876 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); 3872 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
3877 3873
3878 if (page_fault_handle_page_track(vcpu, error_code, gfn)) 3874 if (page_fault_handle_page_track(vcpu, error_code, gfn))
3879 return 1; 3875 return RET_PF_EMULATE;
3880 3876
3881 r = mmu_topup_memory_caches(vcpu); 3877 r = mmu_topup_memory_caches(vcpu);
3882 if (r) 3878 if (r)
@@ -3893,13 +3889,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3893 } 3889 }
3894 3890
3895 if (fast_page_fault(vcpu, gpa, level, error_code)) 3891 if (fast_page_fault(vcpu, gpa, level, error_code))
3896 return 0; 3892 return RET_PF_RETRY;
3897 3893
3898 mmu_seq = vcpu->kvm->mmu_notifier_seq; 3894 mmu_seq = vcpu->kvm->mmu_notifier_seq;
3899 smp_rmb(); 3895 smp_rmb();
3900 3896
3901 if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) 3897 if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
3902 return 0; 3898 return RET_PF_RETRY;
3903 3899
3904 if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) 3900 if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
3905 return r; 3901 return r;
@@ -3919,7 +3915,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
3919out_unlock: 3915out_unlock:
3920 spin_unlock(&vcpu->kvm->mmu_lock); 3916 spin_unlock(&vcpu->kvm->mmu_lock);
3921 kvm_release_pfn_clean(pfn); 3917 kvm_release_pfn_clean(pfn);
3922 return 0; 3918 return RET_PF_RETRY;
3923} 3919}
3924 3920
3925static void nonpaging_init_context(struct kvm_vcpu *vcpu, 3921static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@@ -4918,25 +4914,25 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
4918 vcpu->arch.gpa_val = cr2; 4914 vcpu->arch.gpa_val = cr2;
4919 } 4915 }
4920 4916
4917 r = RET_PF_INVALID;
4921 if (unlikely(error_code & PFERR_RSVD_MASK)) { 4918 if (unlikely(error_code & PFERR_RSVD_MASK)) {
4922 r = handle_mmio_page_fault(vcpu, cr2, direct); 4919 r = handle_mmio_page_fault(vcpu, cr2, direct);
4923 if (r == RET_MMIO_PF_EMULATE) { 4920 if (r == RET_PF_EMULATE) {
4924 emulation_type = 0; 4921 emulation_type = 0;
4925 goto emulate; 4922 goto emulate;
4926 } 4923 }
4927 if (r == RET_MMIO_PF_RETRY)
4928 return 1;
4929 if (r < 0)
4930 return r;
4931 /* Must be RET_MMIO_PF_INVALID. */
4932 } 4924 }
4933 4925
4934 r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), 4926 if (r == RET_PF_INVALID) {
4935 false); 4927 r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
4928 false);
4929 WARN_ON(r == RET_PF_INVALID);
4930 }
4931
4932 if (r == RET_PF_RETRY)
4933 return 1;
4936 if (r < 0) 4934 if (r < 0)
4937 return r; 4935 return r;
4938 if (!r)
4939 return 1;
4940 4936
4941 /* 4937 /*
4942 * Before emulating the instruction, check if the error code 4938 * Before emulating the instruction, check if the error code
@@ -4993,8 +4989,7 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp);
4993static void free_mmu_pages(struct kvm_vcpu *vcpu) 4989static void free_mmu_pages(struct kvm_vcpu *vcpu)
4994{ 4990{
4995 free_page((unsigned long)vcpu->arch.mmu.pae_root); 4991 free_page((unsigned long)vcpu->arch.mmu.pae_root);
4996 if (vcpu->arch.mmu.lm_root != NULL) 4992 free_page((unsigned long)vcpu->arch.mmu.lm_root);
4997 free_page((unsigned long)vcpu->arch.mmu.lm_root);
4998} 4993}
4999 4994
5000static int alloc_mmu_pages(struct kvm_vcpu *vcpu) 4995static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
@@ -5464,10 +5459,8 @@ static struct shrinker mmu_shrinker = {
5464 5459
5465static void mmu_destroy_caches(void) 5460static void mmu_destroy_caches(void)
5466{ 5461{
5467 if (pte_list_desc_cache) 5462 kmem_cache_destroy(pte_list_desc_cache);
5468 kmem_cache_destroy(pte_list_desc_cache); 5463 kmem_cache_destroy(mmu_page_header_cache);
5469 if (mmu_page_header_cache)
5470 kmem_cache_destroy(mmu_page_header_cache);
5471} 5464}
5472 5465
5473int kvm_mmu_module_init(void) 5466int kvm_mmu_module_init(void)
@@ -5476,13 +5469,13 @@ int kvm_mmu_module_init(void)
5476 5469
5477 pte_list_desc_cache = kmem_cache_create("pte_list_desc", 5470 pte_list_desc_cache = kmem_cache_create("pte_list_desc",
5478 sizeof(struct pte_list_desc), 5471 sizeof(struct pte_list_desc),
5479 0, 0, NULL); 5472 0, SLAB_ACCOUNT, NULL);
5480 if (!pte_list_desc_cache) 5473 if (!pte_list_desc_cache)
5481 goto nomem; 5474 goto nomem;
5482 5475
5483 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", 5476 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
5484 sizeof(struct kvm_mmu_page), 5477 sizeof(struct kvm_mmu_page),
5485 0, 0, NULL); 5478 0, SLAB_ACCOUNT, NULL);
5486 if (!mmu_page_header_cache) 5479 if (!mmu_page_header_cache)
5487 goto nomem; 5480 goto nomem;
5488 5481