diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 84 |
1 files changed, 55 insertions, 29 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 813d31038b93..931467881da7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "mmu.h" | 22 | #include "mmu.h" |
23 | #include "x86.h" | 23 | #include "x86.h" |
24 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
25 | #include "cpuid.h" | ||
25 | 26 | ||
26 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
27 | #include <linux/types.h> | 28 | #include <linux/types.h> |
@@ -595,7 +596,8 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) | |||
595 | * we always atomicly update it, see the comments in | 596 | * we always atomicly update it, see the comments in |
596 | * spte_has_volatile_bits(). | 597 | * spte_has_volatile_bits(). |
597 | */ | 598 | */ |
598 | if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) | 599 | if (spte_is_locklessly_modifiable(old_spte) && |
600 | !is_writable_pte(new_spte)) | ||
599 | ret = true; | 601 | ret = true; |
600 | 602 | ||
601 | if (!shadow_accessed_mask) | 603 | if (!shadow_accessed_mask) |
@@ -1176,8 +1178,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1176 | 1178 | ||
1177 | /* | 1179 | /* |
1178 | * Write-protect on the specified @sptep, @pt_protect indicates whether | 1180 | * Write-protect on the specified @sptep, @pt_protect indicates whether |
1179 | * spte writ-protection is caused by protecting shadow page table. | 1181 | * spte write-protection is caused by protecting shadow page table. |
1180 | * @flush indicates whether tlb need be flushed. | ||
1181 | * | 1182 | * |
1182 | * Note: write protection is difference between drity logging and spte | 1183 | * Note: write protection is difference between drity logging and spte |
1183 | * protection: | 1184 | * protection: |
@@ -1186,10 +1187,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1186 | * - for spte protection, the spte can be writable only after unsync-ing | 1187 | * - for spte protection, the spte can be writable only after unsync-ing |
1187 | * shadow page. | 1188 | * shadow page. |
1188 | * | 1189 | * |
1189 | * Return true if the spte is dropped. | 1190 | * Return true if tlb need be flushed. |
1190 | */ | 1191 | */ |
1191 | static bool | 1192 | static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) |
1192 | spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | ||
1193 | { | 1193 | { |
1194 | u64 spte = *sptep; | 1194 | u64 spte = *sptep; |
1195 | 1195 | ||
@@ -1199,17 +1199,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | |||
1199 | 1199 | ||
1200 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); | 1200 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); |
1201 | 1201 | ||
1202 | if (__drop_large_spte(kvm, sptep)) { | ||
1203 | *flush |= true; | ||
1204 | return true; | ||
1205 | } | ||
1206 | |||
1207 | if (pt_protect) | 1202 | if (pt_protect) |
1208 | spte &= ~SPTE_MMU_WRITEABLE; | 1203 | spte &= ~SPTE_MMU_WRITEABLE; |
1209 | spte = spte & ~PT_WRITABLE_MASK; | 1204 | spte = spte & ~PT_WRITABLE_MASK; |
1210 | 1205 | ||
1211 | *flush |= mmu_spte_update(sptep, spte); | 1206 | return mmu_spte_update(sptep, spte); |
1212 | return false; | ||
1213 | } | 1207 | } |
1214 | 1208 | ||
1215 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | 1209 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, |
@@ -1221,11 +1215,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | |||
1221 | 1215 | ||
1222 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1216 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1223 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1217 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1224 | if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { | ||
1225 | sptep = rmap_get_first(*rmapp, &iter); | ||
1226 | continue; | ||
1227 | } | ||
1228 | 1218 | ||
1219 | flush |= spte_write_protect(kvm, sptep, pt_protect); | ||
1229 | sptep = rmap_get_next(&iter); | 1220 | sptep = rmap_get_next(&iter); |
1230 | } | 1221 | } |
1231 | 1222 | ||
@@ -2802,9 +2793,9 @@ static bool page_fault_can_be_fast(u32 error_code) | |||
2802 | } | 2793 | } |
2803 | 2794 | ||
2804 | static bool | 2795 | static bool |
2805 | fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte) | 2796 | fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
2797 | u64 *sptep, u64 spte) | ||
2806 | { | 2798 | { |
2807 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
2808 | gfn_t gfn; | 2799 | gfn_t gfn; |
2809 | 2800 | ||
2810 | WARN_ON(!sp->role.direct); | 2801 | WARN_ON(!sp->role.direct); |
@@ -2830,6 +2821,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2830 | u32 error_code) | 2821 | u32 error_code) |
2831 | { | 2822 | { |
2832 | struct kvm_shadow_walk_iterator iterator; | 2823 | struct kvm_shadow_walk_iterator iterator; |
2824 | struct kvm_mmu_page *sp; | ||
2833 | bool ret = false; | 2825 | bool ret = false; |
2834 | u64 spte = 0ull; | 2826 | u64 spte = 0ull; |
2835 | 2827 | ||
@@ -2853,7 +2845,8 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2853 | goto exit; | 2845 | goto exit; |
2854 | } | 2846 | } |
2855 | 2847 | ||
2856 | if (!is_last_spte(spte, level)) | 2848 | sp = page_header(__pa(iterator.sptep)); |
2849 | if (!is_last_spte(spte, sp->role.level)) | ||
2857 | goto exit; | 2850 | goto exit; |
2858 | 2851 | ||
2859 | /* | 2852 | /* |
@@ -2875,11 +2868,24 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2875 | goto exit; | 2868 | goto exit; |
2876 | 2869 | ||
2877 | /* | 2870 | /* |
2871 | * Do not fix write-permission on the large spte since we only dirty | ||
2872 | * the first page into the dirty-bitmap in fast_pf_fix_direct_spte() | ||
2873 | * that means other pages are missed if its slot is dirty-logged. | ||
2874 | * | ||
2875 | * Instead, we let the slow page fault path create a normal spte to | ||
2876 | * fix the access. | ||
2877 | * | ||
2878 | * See the comments in kvm_arch_commit_memory_region(). | ||
2879 | */ | ||
2880 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | ||
2881 | goto exit; | ||
2882 | |||
2883 | /* | ||
2878 | * Currently, fast page fault only works for direct mapping since | 2884 | * Currently, fast page fault only works for direct mapping since |
2879 | * the gfn is not stable for indirect shadow page. | 2885 | * the gfn is not stable for indirect shadow page. |
2880 | * See Documentation/virtual/kvm/locking.txt to get more detail. | 2886 | * See Documentation/virtual/kvm/locking.txt to get more detail. |
2881 | */ | 2887 | */ |
2882 | ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte); | 2888 | ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte); |
2883 | exit: | 2889 | exit: |
2884 | trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, | 2890 | trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, |
2885 | spte, ret); | 2891 | spte, ret); |
@@ -3511,11 +3517,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3511 | { | 3517 | { |
3512 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 3518 | int maxphyaddr = cpuid_maxphyaddr(vcpu); |
3513 | u64 exb_bit_rsvd = 0; | 3519 | u64 exb_bit_rsvd = 0; |
3520 | u64 gbpages_bit_rsvd = 0; | ||
3514 | 3521 | ||
3515 | context->bad_mt_xwr = 0; | 3522 | context->bad_mt_xwr = 0; |
3516 | 3523 | ||
3517 | if (!context->nx) | 3524 | if (!context->nx) |
3518 | exb_bit_rsvd = rsvd_bits(63, 63); | 3525 | exb_bit_rsvd = rsvd_bits(63, 63); |
3526 | if (!guest_cpuid_has_gbpages(vcpu)) | ||
3527 | gbpages_bit_rsvd = rsvd_bits(7, 7); | ||
3519 | switch (context->root_level) { | 3528 | switch (context->root_level) { |
3520 | case PT32_ROOT_LEVEL: | 3529 | case PT32_ROOT_LEVEL: |
3521 | /* no rsvd bits for 2 level 4K page table entries */ | 3530 | /* no rsvd bits for 2 level 4K page table entries */ |
@@ -3538,7 +3547,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3538 | case PT32E_ROOT_LEVEL: | 3547 | case PT32E_ROOT_LEVEL: |
3539 | context->rsvd_bits_mask[0][2] = | 3548 | context->rsvd_bits_mask[0][2] = |
3540 | rsvd_bits(maxphyaddr, 63) | | 3549 | rsvd_bits(maxphyaddr, 63) | |
3541 | rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ | 3550 | rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ |
3542 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | 3551 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | |
3543 | rsvd_bits(maxphyaddr, 62); /* PDE */ | 3552 | rsvd_bits(maxphyaddr, 62); /* PDE */ |
3544 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | 3553 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | |
@@ -3550,16 +3559,16 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
3550 | break; | 3559 | break; |
3551 | case PT64_ROOT_LEVEL: | 3560 | case PT64_ROOT_LEVEL: |
3552 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 3561 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
3553 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | 3562 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7); |
3554 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | | 3563 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | |
3555 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | 3564 | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); |
3556 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | 3565 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | |
3557 | rsvd_bits(maxphyaddr, 51); | 3566 | rsvd_bits(maxphyaddr, 51); |
3558 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | 3567 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | |
3559 | rsvd_bits(maxphyaddr, 51); | 3568 | rsvd_bits(maxphyaddr, 51); |
3560 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | 3569 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; |
3561 | context->rsvd_bits_mask[1][2] = exb_bit_rsvd | | 3570 | context->rsvd_bits_mask[1][2] = exb_bit_rsvd | |
3562 | rsvd_bits(maxphyaddr, 51) | | 3571 | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) | |
3563 | rsvd_bits(13, 29); | 3572 | rsvd_bits(13, 29); |
3564 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | 3573 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | |
3565 | rsvd_bits(maxphyaddr, 51) | | 3574 | rsvd_bits(maxphyaddr, 51) | |
@@ -4304,15 +4313,32 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
4304 | if (*rmapp) | 4313 | if (*rmapp) |
4305 | __rmap_write_protect(kvm, rmapp, false); | 4314 | __rmap_write_protect(kvm, rmapp, false); |
4306 | 4315 | ||
4307 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { | 4316 | if (need_resched() || spin_needbreak(&kvm->mmu_lock)) |
4308 | kvm_flush_remote_tlbs(kvm); | ||
4309 | cond_resched_lock(&kvm->mmu_lock); | 4317 | cond_resched_lock(&kvm->mmu_lock); |
4310 | } | ||
4311 | } | 4318 | } |
4312 | } | 4319 | } |
4313 | 4320 | ||
4314 | kvm_flush_remote_tlbs(kvm); | ||
4315 | spin_unlock(&kvm->mmu_lock); | 4321 | spin_unlock(&kvm->mmu_lock); |
4322 | |||
4323 | /* | ||
4324 | * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log() | ||
4325 | * which do tlb flush out of mmu-lock should be serialized by | ||
4326 | * kvm->slots_lock otherwise tlb flush would be missed. | ||
4327 | */ | ||
4328 | lockdep_assert_held(&kvm->slots_lock); | ||
4329 | |||
4330 | /* | ||
4331 | * We can flush all the TLBs out of the mmu lock without TLB | ||
4332 | * corruption since we just change the spte from writable to | ||
4333 | * readonly so that we only need to care the case of changing | ||
4334 | * spte from present to present (changing the spte from present | ||
4335 | * to nonpresent will flush all the TLBs immediately), in other | ||
4336 | * words, the only case we care is mmu_spte_update() where we | ||
4337 | * haved checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE | ||
4338 | * instead of PT_WRITABLE_MASK, that means it does not depend | ||
4339 | * on PT_WRITABLE_MASK anymore. | ||
4340 | */ | ||
4341 | kvm_flush_remote_tlbs(kvm); | ||
4316 | } | 4342 | } |
4317 | 4343 | ||
4318 | #define BATCH_ZAP_PAGES 10 | 4344 | #define BATCH_ZAP_PAGES 10 |