aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c84
1 files changed, 55 insertions, 29 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 813d31038b93..931467881da7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -22,6 +22,7 @@
22#include "mmu.h" 22#include "mmu.h"
23#include "x86.h" 23#include "x86.h"
24#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include "cpuid.h"
25 26
26#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
27#include <linux/types.h> 28#include <linux/types.h>
@@ -595,7 +596,8 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
595 * we always atomicly update it, see the comments in 596 * we always atomicly update it, see the comments in
596 * spte_has_volatile_bits(). 597 * spte_has_volatile_bits().
597 */ 598 */
598 if (is_writable_pte(old_spte) && !is_writable_pte(new_spte)) 599 if (spte_is_locklessly_modifiable(old_spte) &&
600 !is_writable_pte(new_spte))
599 ret = true; 601 ret = true;
600 602
601 if (!shadow_accessed_mask) 603 if (!shadow_accessed_mask)
@@ -1176,8 +1178,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1176 1178
1177/* 1179/*
1178 * Write-protect on the specified @sptep, @pt_protect indicates whether 1180 * Write-protect on the specified @sptep, @pt_protect indicates whether
1179 * spte writ-protection is caused by protecting shadow page table. 1181 * spte write-protection is caused by protecting shadow page table.
1180 * @flush indicates whether tlb need be flushed.
1181 * 1182 *
1182 * Note: write protection is difference between drity logging and spte 1183 * Note: write protection is difference between drity logging and spte
1183 * protection: 1184 * protection:
@@ -1186,10 +1187,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1186 * - for spte protection, the spte can be writable only after unsync-ing 1187 * - for spte protection, the spte can be writable only after unsync-ing
1187 * shadow page. 1188 * shadow page.
1188 * 1189 *
1189 * Return true if the spte is dropped. 1190 * Return true if tlb need be flushed.
1190 */ 1191 */
1191static bool 1192static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
1192spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1193{ 1193{
1194 u64 spte = *sptep; 1194 u64 spte = *sptep;
1195 1195
@@ -1199,17 +1199,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1199 1199
1200 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); 1200 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
1201 1201
1202 if (__drop_large_spte(kvm, sptep)) {
1203 *flush |= true;
1204 return true;
1205 }
1206
1207 if (pt_protect) 1202 if (pt_protect)
1208 spte &= ~SPTE_MMU_WRITEABLE; 1203 spte &= ~SPTE_MMU_WRITEABLE;
1209 spte = spte & ~PT_WRITABLE_MASK; 1204 spte = spte & ~PT_WRITABLE_MASK;
1210 1205
1211 *flush |= mmu_spte_update(sptep, spte); 1206 return mmu_spte_update(sptep, spte);
1212 return false;
1213} 1207}
1214 1208
1215static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1209static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
@@ -1221,11 +1215,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1221 1215
1222 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { 1216 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1223 BUG_ON(!(*sptep & PT_PRESENT_MASK)); 1217 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1224 if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
1225 sptep = rmap_get_first(*rmapp, &iter);
1226 continue;
1227 }
1228 1218
1219 flush |= spte_write_protect(kvm, sptep, pt_protect);
1229 sptep = rmap_get_next(&iter); 1220 sptep = rmap_get_next(&iter);
1230 } 1221 }
1231 1222
@@ -2802,9 +2793,9 @@ static bool page_fault_can_be_fast(u32 error_code)
2802} 2793}
2803 2794
2804static bool 2795static bool
2805fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 spte) 2796fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
2797 u64 *sptep, u64 spte)
2806{ 2798{
2807 struct kvm_mmu_page *sp = page_header(__pa(sptep));
2808 gfn_t gfn; 2799 gfn_t gfn;
2809 2800
2810 WARN_ON(!sp->role.direct); 2801 WARN_ON(!sp->role.direct);
@@ -2830,6 +2821,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2830 u32 error_code) 2821 u32 error_code)
2831{ 2822{
2832 struct kvm_shadow_walk_iterator iterator; 2823 struct kvm_shadow_walk_iterator iterator;
2824 struct kvm_mmu_page *sp;
2833 bool ret = false; 2825 bool ret = false;
2834 u64 spte = 0ull; 2826 u64 spte = 0ull;
2835 2827
@@ -2853,7 +2845,8 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2853 goto exit; 2845 goto exit;
2854 } 2846 }
2855 2847
2856 if (!is_last_spte(spte, level)) 2848 sp = page_header(__pa(iterator.sptep));
2849 if (!is_last_spte(spte, sp->role.level))
2857 goto exit; 2850 goto exit;
2858 2851
2859 /* 2852 /*
@@ -2875,11 +2868,24 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2875 goto exit; 2868 goto exit;
2876 2869
2877 /* 2870 /*
2871 * Do not fix write-permission on the large spte since we only dirty
2872 * the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
2873 * that means other pages are missed if its slot is dirty-logged.
2874 *
2875 * Instead, we let the slow page fault path create a normal spte to
2876 * fix the access.
2877 *
2878 * See the comments in kvm_arch_commit_memory_region().
2879 */
2880 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
2881 goto exit;
2882
2883 /*
2878 * Currently, fast page fault only works for direct mapping since 2884 * Currently, fast page fault only works for direct mapping since
2879 * the gfn is not stable for indirect shadow page. 2885 * the gfn is not stable for indirect shadow page.
2880 * See Documentation/virtual/kvm/locking.txt to get more detail. 2886 * See Documentation/virtual/kvm/locking.txt to get more detail.
2881 */ 2887 */
2882 ret = fast_pf_fix_direct_spte(vcpu, iterator.sptep, spte); 2888 ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte);
2883exit: 2889exit:
2884 trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, 2890 trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep,
2885 spte, ret); 2891 spte, ret);
@@ -3511,11 +3517,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3511{ 3517{
3512 int maxphyaddr = cpuid_maxphyaddr(vcpu); 3518 int maxphyaddr = cpuid_maxphyaddr(vcpu);
3513 u64 exb_bit_rsvd = 0; 3519 u64 exb_bit_rsvd = 0;
3520 u64 gbpages_bit_rsvd = 0;
3514 3521
3515 context->bad_mt_xwr = 0; 3522 context->bad_mt_xwr = 0;
3516 3523
3517 if (!context->nx) 3524 if (!context->nx)
3518 exb_bit_rsvd = rsvd_bits(63, 63); 3525 exb_bit_rsvd = rsvd_bits(63, 63);
3526 if (!guest_cpuid_has_gbpages(vcpu))
3527 gbpages_bit_rsvd = rsvd_bits(7, 7);
3519 switch (context->root_level) { 3528 switch (context->root_level) {
3520 case PT32_ROOT_LEVEL: 3529 case PT32_ROOT_LEVEL:
3521 /* no rsvd bits for 2 level 4K page table entries */ 3530 /* no rsvd bits for 2 level 4K page table entries */
@@ -3538,7 +3547,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3538 case PT32E_ROOT_LEVEL: 3547 case PT32E_ROOT_LEVEL:
3539 context->rsvd_bits_mask[0][2] = 3548 context->rsvd_bits_mask[0][2] =
3540 rsvd_bits(maxphyaddr, 63) | 3549 rsvd_bits(maxphyaddr, 63) |
3541 rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ 3550 rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */
3542 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3551 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3543 rsvd_bits(maxphyaddr, 62); /* PDE */ 3552 rsvd_bits(maxphyaddr, 62); /* PDE */
3544 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3553 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
@@ -3550,16 +3559,16 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
3550 break; 3559 break;
3551 case PT64_ROOT_LEVEL: 3560 case PT64_ROOT_LEVEL:
3552 context->rsvd_bits_mask[0][3] = exb_bit_rsvd | 3561 context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
3553 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); 3562 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
3554 context->rsvd_bits_mask[0][2] = exb_bit_rsvd | 3563 context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
3555 rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); 3564 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
3556 context->rsvd_bits_mask[0][1] = exb_bit_rsvd | 3565 context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
3557 rsvd_bits(maxphyaddr, 51); 3566 rsvd_bits(maxphyaddr, 51);
3558 context->rsvd_bits_mask[0][0] = exb_bit_rsvd | 3567 context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
3559 rsvd_bits(maxphyaddr, 51); 3568 rsvd_bits(maxphyaddr, 51);
3560 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; 3569 context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3];
3561 context->rsvd_bits_mask[1][2] = exb_bit_rsvd | 3570 context->rsvd_bits_mask[1][2] = exb_bit_rsvd |
3562 rsvd_bits(maxphyaddr, 51) | 3571 gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) |
3563 rsvd_bits(13, 29); 3572 rsvd_bits(13, 29);
3564 context->rsvd_bits_mask[1][1] = exb_bit_rsvd | 3573 context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
3565 rsvd_bits(maxphyaddr, 51) | 3574 rsvd_bits(maxphyaddr, 51) |
@@ -4304,15 +4313,32 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4304 if (*rmapp) 4313 if (*rmapp)
4305 __rmap_write_protect(kvm, rmapp, false); 4314 __rmap_write_protect(kvm, rmapp, false);
4306 4315
4307 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 4316 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
4308 kvm_flush_remote_tlbs(kvm);
4309 cond_resched_lock(&kvm->mmu_lock); 4317 cond_resched_lock(&kvm->mmu_lock);
4310 }
4311 } 4318 }
4312 } 4319 }
4313 4320
4314 kvm_flush_remote_tlbs(kvm);
4315 spin_unlock(&kvm->mmu_lock); 4321 spin_unlock(&kvm->mmu_lock);
4322
4323 /*
4324 * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
4325 * which do tlb flush out of mmu-lock should be serialized by
4326 * kvm->slots_lock otherwise tlb flush would be missed.
4327 */
4328 lockdep_assert_held(&kvm->slots_lock);
4329
4330 /*
4331 * We can flush all the TLBs out of the mmu lock without TLB
4332 * corruption since we just change the spte from writable to
4333 * readonly so that we only need to care the case of changing
4334 * spte from present to present (changing the spte from present
4335 * to nonpresent will flush all the TLBs immediately), in other
4336 * words, the only case we care is mmu_spte_update() where we
4337 * haved checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
4338 * instead of PT_WRITABLE_MASK, that means it does not depend
4339 * on PT_WRITABLE_MASK anymore.
4340 */
4341 kvm_flush_remote_tlbs(kvm);
4316} 4342}
4317 4343
4318#define BATCH_ZAP_PAGES 10 4344#define BATCH_ZAP_PAGES 10