aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2014-04-17 05:06:16 -0400
committerMarcelo Tosatti <mtosatti@redhat.com>2014-04-23 16:49:52 -0400
commit198c74f43f0f5473f99967aead30ddc622804bc1 (patch)
treeaac35e7d0e127e2a553282a686085b000d786791 /arch/x86/kvm
parent7f31c9595e3c87f68dc54b3269e900f3017ed405 (diff)
KVM: MMU: flush tlb out of mmu lock when write-protect the sptes
Now we can flush all the TLBs out of the mmu lock without TLB corruption when write-proect the sptes, it is because: - we have marked large sptes readonly instead of dropping them that means we just change the spte from writable to readonly so that we only need to care the case of changing spte from present to present (changing the spte from present to nonpresent will flush all the TLBs immediately), in other words, the only case we need to care is mmu_spte_update() - in mmu_spte_update(), we haved checked SPTE_HOST_WRITEABLE | PTE_MMU_WRITEABLE instead of PT_WRITABLE_MASK, that means it does not depend on PT_WRITABLE_MASK anymore Acked-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/mmu.c25
-rw-r--r--arch/x86/kvm/mmu.h33
-rw-r--r--arch/x86/kvm/x86.c12
3 files changed, 64 insertions, 6 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 388a2ef83911..65f2400b8268 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4309,15 +4309,32 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
4309 if (*rmapp) 4309 if (*rmapp)
4310 __rmap_write_protect(kvm, rmapp, false); 4310 __rmap_write_protect(kvm, rmapp, false);
4311 4311
4312 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { 4312 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
4313 kvm_flush_remote_tlbs(kvm);
4314 cond_resched_lock(&kvm->mmu_lock); 4313 cond_resched_lock(&kvm->mmu_lock);
4315 }
4316 } 4314 }
4317 } 4315 }
4318 4316
4319 kvm_flush_remote_tlbs(kvm);
4320 spin_unlock(&kvm->mmu_lock); 4317 spin_unlock(&kvm->mmu_lock);
4318
4319 /*
4320 * kvm_mmu_slot_remove_write_access() and kvm_vm_ioctl_get_dirty_log()
4321 * which do tlb flush out of mmu-lock should be serialized by
4322 * kvm->slots_lock otherwise tlb flush would be missed.
4323 */
4324 lockdep_assert_held(&kvm->slots_lock);
4325
4326 /*
4327 * We can flush all the TLBs out of the mmu lock without TLB
4328 * corruption since we just change the spte from writable to
4329 * readonly so that we only need to care the case of changing
4330 * spte from present to present (changing the spte from present
4331 * to nonpresent will flush all the TLBs immediately), in other
4332 * words, the only case we care is mmu_spte_update() where we
4333 * haved checked SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE
4334 * instead of PT_WRITABLE_MASK, that means it does not depend
4335 * on PT_WRITABLE_MASK anymore.
4336 */
4337 kvm_flush_remote_tlbs(kvm);
4321} 4338}
4322 4339
4323#define BATCH_ZAP_PAGES 10 4340#define BATCH_ZAP_PAGES 10
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 3842e70bdb7c..b982112d2ca5 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -104,6 +104,39 @@ static inline int is_present_gpte(unsigned long pte)
104 return pte & PT_PRESENT_MASK; 104 return pte & PT_PRESENT_MASK;
105} 105}
106 106
107/*
108 * Currently, we have two sorts of write-protection, a) the first one
109 * write-protects guest page to sync the guest modification, b) another one is
110 * used to sync dirty bitmap when we do KVM_GET_DIRTY_LOG. The differences
111 * between these two sorts are:
112 * 1) the first case clears SPTE_MMU_WRITEABLE bit.
113 * 2) the first case requires flushing tlb immediately avoiding corrupting
114 * shadow page table between all vcpus so it should be in the protection of
115 * mmu-lock. And the another case does not need to flush tlb until returning
116 * the dirty bitmap to userspace since it only write-protects the page
117 * logged in the bitmap, that means the page in the dirty bitmap is not
118 * missed, so it can flush tlb out of mmu-lock.
119 *
120 * So, there is the problem: the first case can meet the corrupted tlb caused
121 * by another case which write-protects pages but without flush tlb
122 * immediately. In order to making the first case be aware this problem we let
123 * it flush tlb if we try to write-protect a spte whose SPTE_MMU_WRITEABLE bit
124 * is set, it works since another case never touches SPTE_MMU_WRITEABLE bit.
125 *
126 * Anyway, whenever a spte is updated (only permission and status bits are
127 * changed) we need to check whether the spte with SPTE_MMU_WRITEABLE becomes
128 * readonly, if that happens, we need to flush tlb. Fortunately,
129 * mmu_spte_update() has already handled it perfectly.
130 *
131 * The rules to use SPTE_MMU_WRITEABLE and PT_WRITABLE_MASK:
132 * - if we want to see if it has writable tlb entry or if the spte can be
133 * writable on the mmu mapping, check SPTE_MMU_WRITEABLE, this is the most
134 * case, otherwise
135 * - if we fix page fault on the spte or do write-protection by dirty logging,
136 * check PT_WRITABLE_MASK.
137 *
138 * TODO: introduce APIs to split these two cases.
139 */
107static inline int is_writable_pte(unsigned long pte) 140static inline int is_writable_pte(unsigned long pte)
108{ 141{
109 return pte & PT_WRITABLE_MASK; 142 return pte & PT_WRITABLE_MASK;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63a828d206c5..c5582c385bc0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3632,11 +3632,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3632 offset = i * BITS_PER_LONG; 3632 offset = i * BITS_PER_LONG;
3633 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); 3633 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3634 } 3634 }
3635 if (is_dirty)
3636 kvm_flush_remote_tlbs(kvm);
3637 3635
3638 spin_unlock(&kvm->mmu_lock); 3636 spin_unlock(&kvm->mmu_lock);
3639 3637
3638 /* See the comments in kvm_mmu_slot_remove_write_access(). */
3639 lockdep_assert_held(&kvm->slots_lock);
3640
3641 /*
3642 * All the TLBs can be flushed out of mmu lock, see the comments in
3643 * kvm_mmu_slot_remove_write_access().
3644 */
3645 if (is_dirty)
3646 kvm_flush_remote_tlbs(kvm);
3647
3640 r = -EFAULT; 3648 r = -EFAULT;
3641 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) 3649 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3642 goto out; 3650 goto out;