diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 115 |
1 files changed, 54 insertions, 61 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a119b361b8b7..e5e66e5c6640 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -150,6 +150,20 @@ module_param(dbg, bool, 0644); | |||
150 | /* make pte_list_desc fit well in cache line */ | 150 | /* make pte_list_desc fit well in cache line */ |
151 | #define PTE_LIST_EXT 3 | 151 | #define PTE_LIST_EXT 3 |
152 | 152 | ||
153 | /* | ||
154 | * Return values of handle_mmio_page_fault and mmu.page_fault: | ||
155 | * RET_PF_RETRY: let CPU fault again on the address. | ||
156 | * RET_PF_EMULATE: mmio page fault, emulate the instruction directly. | ||
157 | * | ||
158 | * For handle_mmio_page_fault only: | ||
159 | * RET_PF_INVALID: the spte is invalid, let the real page fault path update it. | ||
160 | */ | ||
161 | enum { | ||
162 | RET_PF_RETRY = 0, | ||
163 | RET_PF_EMULATE = 1, | ||
164 | RET_PF_INVALID = 2, | ||
165 | }; | ||
166 | |||
153 | struct pte_list_desc { | 167 | struct pte_list_desc { |
154 | u64 *sptes[PTE_LIST_EXT]; | 168 | u64 *sptes[PTE_LIST_EXT]; |
155 | struct pte_list_desc *more; | 169 | struct pte_list_desc *more; |
@@ -2424,7 +2438,7 @@ static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator, | |||
2424 | 2438 | ||
2425 | static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) | 2439 | static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) |
2426 | { | 2440 | { |
2427 | return __shadow_walk_next(iterator, *iterator->sptep); | 2441 | __shadow_walk_next(iterator, *iterator->sptep); |
2428 | } | 2442 | } |
2429 | 2443 | ||
2430 | static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, | 2444 | static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, |
@@ -2794,13 +2808,13 @@ done: | |||
2794 | return ret; | 2808 | return ret; |
2795 | } | 2809 | } |
2796 | 2810 | ||
2797 | static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, | 2811 | static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, |
2798 | int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, | 2812 | int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn, |
2799 | bool speculative, bool host_writable) | 2813 | bool speculative, bool host_writable) |
2800 | { | 2814 | { |
2801 | int was_rmapped = 0; | 2815 | int was_rmapped = 0; |
2802 | int rmap_count; | 2816 | int rmap_count; |
2803 | bool emulate = false; | 2817 | int ret = RET_PF_RETRY; |
2804 | 2818 | ||
2805 | pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, | 2819 | pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, |
2806 | *sptep, write_fault, gfn); | 2820 | *sptep, write_fault, gfn); |
@@ -2830,12 +2844,12 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, | |||
2830 | if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, | 2844 | if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, |
2831 | true, host_writable)) { | 2845 | true, host_writable)) { |
2832 | if (write_fault) | 2846 | if (write_fault) |
2833 | emulate = true; | 2847 | ret = RET_PF_EMULATE; |
2834 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); | 2848 | kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); |
2835 | } | 2849 | } |
2836 | 2850 | ||
2837 | if (unlikely(is_mmio_spte(*sptep))) | 2851 | if (unlikely(is_mmio_spte(*sptep))) |
2838 | emulate = true; | 2852 | ret = RET_PF_EMULATE; |
2839 | 2853 | ||
2840 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); | 2854 | pgprintk("%s: setting spte %llx\n", __func__, *sptep); |
2841 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", | 2855 | pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", |
@@ -2855,7 +2869,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, | |||
2855 | 2869 | ||
2856 | kvm_release_pfn_clean(pfn); | 2870 | kvm_release_pfn_clean(pfn); |
2857 | 2871 | ||
2858 | return emulate; | 2872 | return ret; |
2859 | } | 2873 | } |
2860 | 2874 | ||
2861 | static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2875 | static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
@@ -2994,14 +3008,13 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) | |||
2994 | * Do not cache the mmio info caused by writing the readonly gfn | 3008 | * Do not cache the mmio info caused by writing the readonly gfn |
2995 | * into the spte otherwise read access on readonly gfn also can | 3009 | * into the spte otherwise read access on readonly gfn also can |
2996 | * caused mmio page fault and treat it as mmio access. | 3010 | * caused mmio page fault and treat it as mmio access. |
2997 | * Return 1 to tell kvm to emulate it. | ||
2998 | */ | 3011 | */ |
2999 | if (pfn == KVM_PFN_ERR_RO_FAULT) | 3012 | if (pfn == KVM_PFN_ERR_RO_FAULT) |
3000 | return 1; | 3013 | return RET_PF_EMULATE; |
3001 | 3014 | ||
3002 | if (pfn == KVM_PFN_ERR_HWPOISON) { | 3015 | if (pfn == KVM_PFN_ERR_HWPOISON) { |
3003 | kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current); | 3016 | kvm_send_hwpoison_signal(kvm_vcpu_gfn_to_hva(vcpu, gfn), current); |
3004 | return 0; | 3017 | return RET_PF_RETRY; |
3005 | } | 3018 | } |
3006 | 3019 | ||
3007 | return -EFAULT; | 3020 | return -EFAULT; |
@@ -3286,13 +3299,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
3286 | } | 3299 | } |
3287 | 3300 | ||
3288 | if (fast_page_fault(vcpu, v, level, error_code)) | 3301 | if (fast_page_fault(vcpu, v, level, error_code)) |
3289 | return 0; | 3302 | return RET_PF_RETRY; |
3290 | 3303 | ||
3291 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 3304 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3292 | smp_rmb(); | 3305 | smp_rmb(); |
3293 | 3306 | ||
3294 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) | 3307 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) |
3295 | return 0; | 3308 | return RET_PF_RETRY; |
3296 | 3309 | ||
3297 | if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) | 3310 | if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) |
3298 | return r; | 3311 | return r; |
@@ -3312,7 +3325,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
3312 | out_unlock: | 3325 | out_unlock: |
3313 | spin_unlock(&vcpu->kvm->mmu_lock); | 3326 | spin_unlock(&vcpu->kvm->mmu_lock); |
3314 | kvm_release_pfn_clean(pfn); | 3327 | kvm_release_pfn_clean(pfn); |
3315 | return 0; | 3328 | return RET_PF_RETRY; |
3316 | } | 3329 | } |
3317 | 3330 | ||
3318 | 3331 | ||
@@ -3659,54 +3672,38 @@ exit: | |||
3659 | return reserved; | 3672 | return reserved; |
3660 | } | 3673 | } |
3661 | 3674 | ||
3662 | /* | ||
3663 | * Return values of handle_mmio_page_fault: | ||
3664 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | ||
3665 | * directly. | ||
3666 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | ||
3667 | * fault path update the mmio spte. | ||
3668 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | ||
3669 | * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). | ||
3670 | */ | ||
3671 | enum { | ||
3672 | RET_MMIO_PF_EMULATE = 1, | ||
3673 | RET_MMIO_PF_INVALID = 2, | ||
3674 | RET_MMIO_PF_RETRY = 0, | ||
3675 | RET_MMIO_PF_BUG = -1 | ||
3676 | }; | ||
3677 | |||
3678 | static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3675 | static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
3679 | { | 3676 | { |
3680 | u64 spte; | 3677 | u64 spte; |
3681 | bool reserved; | 3678 | bool reserved; |
3682 | 3679 | ||
3683 | if (mmio_info_in_cache(vcpu, addr, direct)) | 3680 | if (mmio_info_in_cache(vcpu, addr, direct)) |
3684 | return RET_MMIO_PF_EMULATE; | 3681 | return RET_PF_EMULATE; |
3685 | 3682 | ||
3686 | reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); | 3683 | reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); |
3687 | if (WARN_ON(reserved)) | 3684 | if (WARN_ON(reserved)) |
3688 | return RET_MMIO_PF_BUG; | 3685 | return -EINVAL; |
3689 | 3686 | ||
3690 | if (is_mmio_spte(spte)) { | 3687 | if (is_mmio_spte(spte)) { |
3691 | gfn_t gfn = get_mmio_spte_gfn(spte); | 3688 | gfn_t gfn = get_mmio_spte_gfn(spte); |
3692 | unsigned access = get_mmio_spte_access(spte); | 3689 | unsigned access = get_mmio_spte_access(spte); |
3693 | 3690 | ||
3694 | if (!check_mmio_spte(vcpu, spte)) | 3691 | if (!check_mmio_spte(vcpu, spte)) |
3695 | return RET_MMIO_PF_INVALID; | 3692 | return RET_PF_INVALID; |
3696 | 3693 | ||
3697 | if (direct) | 3694 | if (direct) |
3698 | addr = 0; | 3695 | addr = 0; |
3699 | 3696 | ||
3700 | trace_handle_mmio_page_fault(addr, gfn, access); | 3697 | trace_handle_mmio_page_fault(addr, gfn, access); |
3701 | vcpu_cache_mmio_info(vcpu, addr, gfn, access); | 3698 | vcpu_cache_mmio_info(vcpu, addr, gfn, access); |
3702 | return RET_MMIO_PF_EMULATE; | 3699 | return RET_PF_EMULATE; |
3703 | } | 3700 | } |
3704 | 3701 | ||
3705 | /* | 3702 | /* |
3706 | * If the page table is zapped by other cpus, let CPU fault again on | 3703 | * If the page table is zapped by other cpus, let CPU fault again on |
3707 | * the address. | 3704 | * the address. |
3708 | */ | 3705 | */ |
3709 | return RET_MMIO_PF_RETRY; | 3706 | return RET_PF_RETRY; |
3710 | } | 3707 | } |
3711 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault); | 3708 | EXPORT_SYMBOL_GPL(handle_mmio_page_fault); |
3712 | 3709 | ||
@@ -3756,7 +3753,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
3756 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); | 3753 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
3757 | 3754 | ||
3758 | if (page_fault_handle_page_track(vcpu, error_code, gfn)) | 3755 | if (page_fault_handle_page_track(vcpu, error_code, gfn)) |
3759 | return 1; | 3756 | return RET_PF_EMULATE; |
3760 | 3757 | ||
3761 | r = mmu_topup_memory_caches(vcpu); | 3758 | r = mmu_topup_memory_caches(vcpu); |
3762 | if (r) | 3759 | if (r) |
@@ -3820,8 +3817,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | |||
3820 | } | 3817 | } |
3821 | 3818 | ||
3822 | int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, | 3819 | int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, |
3823 | u64 fault_address, char *insn, int insn_len, | 3820 | u64 fault_address, char *insn, int insn_len) |
3824 | bool need_unprotect) | ||
3825 | { | 3821 | { |
3826 | int r = 1; | 3822 | int r = 1; |
3827 | 3823 | ||
@@ -3829,7 +3825,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, | |||
3829 | default: | 3825 | default: |
3830 | trace_kvm_page_fault(fault_address, error_code); | 3826 | trace_kvm_page_fault(fault_address, error_code); |
3831 | 3827 | ||
3832 | if (need_unprotect && kvm_event_needs_reinjection(vcpu)) | 3828 | if (kvm_event_needs_reinjection(vcpu)) |
3833 | kvm_mmu_unprotect_page_virt(vcpu, fault_address); | 3829 | kvm_mmu_unprotect_page_virt(vcpu, fault_address); |
3834 | r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, | 3830 | r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn, |
3835 | insn_len); | 3831 | insn_len); |
@@ -3876,7 +3872,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3876 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 3872 | MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
3877 | 3873 | ||
3878 | if (page_fault_handle_page_track(vcpu, error_code, gfn)) | 3874 | if (page_fault_handle_page_track(vcpu, error_code, gfn)) |
3879 | return 1; | 3875 | return RET_PF_EMULATE; |
3880 | 3876 | ||
3881 | r = mmu_topup_memory_caches(vcpu); | 3877 | r = mmu_topup_memory_caches(vcpu); |
3882 | if (r) | 3878 | if (r) |
@@ -3893,13 +3889,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3893 | } | 3889 | } |
3894 | 3890 | ||
3895 | if (fast_page_fault(vcpu, gpa, level, error_code)) | 3891 | if (fast_page_fault(vcpu, gpa, level, error_code)) |
3896 | return 0; | 3892 | return RET_PF_RETRY; |
3897 | 3893 | ||
3898 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 3894 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3899 | smp_rmb(); | 3895 | smp_rmb(); |
3900 | 3896 | ||
3901 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) | 3897 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) |
3902 | return 0; | 3898 | return RET_PF_RETRY; |
3903 | 3899 | ||
3904 | if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) | 3900 | if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) |
3905 | return r; | 3901 | return r; |
@@ -3919,7 +3915,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3919 | out_unlock: | 3915 | out_unlock: |
3920 | spin_unlock(&vcpu->kvm->mmu_lock); | 3916 | spin_unlock(&vcpu->kvm->mmu_lock); |
3921 | kvm_release_pfn_clean(pfn); | 3917 | kvm_release_pfn_clean(pfn); |
3922 | return 0; | 3918 | return RET_PF_RETRY; |
3923 | } | 3919 | } |
3924 | 3920 | ||
3925 | static void nonpaging_init_context(struct kvm_vcpu *vcpu, | 3921 | static void nonpaging_init_context(struct kvm_vcpu *vcpu, |
@@ -4918,25 +4914,25 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
4918 | vcpu->arch.gpa_val = cr2; | 4914 | vcpu->arch.gpa_val = cr2; |
4919 | } | 4915 | } |
4920 | 4916 | ||
4917 | r = RET_PF_INVALID; | ||
4921 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 4918 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
4922 | r = handle_mmio_page_fault(vcpu, cr2, direct); | 4919 | r = handle_mmio_page_fault(vcpu, cr2, direct); |
4923 | if (r == RET_MMIO_PF_EMULATE) { | 4920 | if (r == RET_PF_EMULATE) { |
4924 | emulation_type = 0; | 4921 | emulation_type = 0; |
4925 | goto emulate; | 4922 | goto emulate; |
4926 | } | 4923 | } |
4927 | if (r == RET_MMIO_PF_RETRY) | ||
4928 | return 1; | ||
4929 | if (r < 0) | ||
4930 | return r; | ||
4931 | /* Must be RET_MMIO_PF_INVALID. */ | ||
4932 | } | 4924 | } |
4933 | 4925 | ||
4934 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), | 4926 | if (r == RET_PF_INVALID) { |
4935 | false); | 4927 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), |
4928 | false); | ||
4929 | WARN_ON(r == RET_PF_INVALID); | ||
4930 | } | ||
4931 | |||
4932 | if (r == RET_PF_RETRY) | ||
4933 | return 1; | ||
4936 | if (r < 0) | 4934 | if (r < 0) |
4937 | return r; | 4935 | return r; |
4938 | if (!r) | ||
4939 | return 1; | ||
4940 | 4936 | ||
4941 | /* | 4937 | /* |
4942 | * Before emulating the instruction, check if the error code | 4938 | * Before emulating the instruction, check if the error code |
@@ -4993,8 +4989,7 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); | |||
4993 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 4989 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
4994 | { | 4990 | { |
4995 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 4991 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
4996 | if (vcpu->arch.mmu.lm_root != NULL) | 4992 | free_page((unsigned long)vcpu->arch.mmu.lm_root); |
4997 | free_page((unsigned long)vcpu->arch.mmu.lm_root); | ||
4998 | } | 4993 | } |
4999 | 4994 | ||
5000 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | 4995 | static int alloc_mmu_pages(struct kvm_vcpu *vcpu) |
@@ -5464,10 +5459,8 @@ static struct shrinker mmu_shrinker = { | |||
5464 | 5459 | ||
5465 | static void mmu_destroy_caches(void) | 5460 | static void mmu_destroy_caches(void) |
5466 | { | 5461 | { |
5467 | if (pte_list_desc_cache) | 5462 | kmem_cache_destroy(pte_list_desc_cache); |
5468 | kmem_cache_destroy(pte_list_desc_cache); | 5463 | kmem_cache_destroy(mmu_page_header_cache); |
5469 | if (mmu_page_header_cache) | ||
5470 | kmem_cache_destroy(mmu_page_header_cache); | ||
5471 | } | 5464 | } |
5472 | 5465 | ||
5473 | int kvm_mmu_module_init(void) | 5466 | int kvm_mmu_module_init(void) |
@@ -5476,13 +5469,13 @@ int kvm_mmu_module_init(void) | |||
5476 | 5469 | ||
5477 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", | 5470 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", |
5478 | sizeof(struct pte_list_desc), | 5471 | sizeof(struct pte_list_desc), |
5479 | 0, 0, NULL); | 5472 | 0, SLAB_ACCOUNT, NULL); |
5480 | if (!pte_list_desc_cache) | 5473 | if (!pte_list_desc_cache) |
5481 | goto nomem; | 5474 | goto nomem; |
5482 | 5475 | ||
5483 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", | 5476 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", |
5484 | sizeof(struct kvm_mmu_page), | 5477 | sizeof(struct kvm_mmu_page), |
5485 | 0, 0, NULL); | 5478 | 0, SLAB_ACCOUNT, NULL); |
5486 | if (!mmu_page_header_cache) | 5479 | if (!mmu_page_header_cache) |
5487 | goto nomem; | 5480 | goto nomem; |
5488 | 5481 | ||