aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2014-04-17 05:06:14 -0400
committerMarcelo Tosatti <mtosatti@redhat.com>2014-04-23 16:49:50 -0400
commitc126d94f2c90ed9daee24a94f1c67aff7e9bf387 (patch)
treeec5b3af6af89179ff9565c9f0fd32c7914a779ed /arch/x86/kvm
parent92a476cbfc476c63ee982dd33d15a8c88b4d51b9 (diff)
KVM: MMU: lazily drop large spte
Currently, kvm zaps the large spte if write-protected is needed, the later read can fault on that spte. Actually, we can make the large spte readonly instead of making them un-present, the page fault caused by read access can be avoided The idea is from Avi: | As I mentioned before, write-protecting a large spte is a good idea, | since it moves some work from protect-time to fault-time, so it reduces | jitter. This removes the need for the return value. This version has fixed the issue reported in 6b73a9606, the reason of that issue is that fast_page_fault() directly sets the readonly large spte to writable but only dirty the first page into the dirty-bitmap that means other pages are missed. Fixed it by only the normal sptes (on the PT_PAGE_TABLE_LEVEL level) can be fast fixed Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/mmu.c34
-rw-r--r--arch/x86/kvm/x86.c8
2 files changed, 24 insertions, 18 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 63107049249d..ddf06963a74c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1176,8 +1176,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1176 1176
1177/* 1177/*
1178 * Write-protect on the specified @sptep, @pt_protect indicates whether 1178 * Write-protect on the specified @sptep, @pt_protect indicates whether
1179 * spte writ-protection is caused by protecting shadow page table. 1179 * spte write-protection is caused by protecting shadow page table.
1180 * @flush indicates whether tlb need be flushed.
1181 * 1180 *
1182 * Note: write protection is difference between drity logging and spte 1181 * Note: write protection is difference between drity logging and spte
1183 * protection: 1182 * protection:
@@ -1186,10 +1185,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
1186 * - for spte protection, the spte can be writable only after unsync-ing 1185 * - for spte protection, the spte can be writable only after unsync-ing
1187 * shadow page. 1186 * shadow page.
1188 * 1187 *
1189 * Return true if the spte is dropped. 1188 * Return true if tlb need be flushed.
1190 */ 1189 */
1191static bool 1190static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
1192spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1193{ 1191{
1194 u64 spte = *sptep; 1192 u64 spte = *sptep;
1195 1193
@@ -1199,17 +1197,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect)
1199 1197
1200 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); 1198 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
1201 1199
1202 if (__drop_large_spte(kvm, sptep)) {
1203 *flush |= true;
1204 return true;
1205 }
1206
1207 if (pt_protect) 1200 if (pt_protect)
1208 spte &= ~SPTE_MMU_WRITEABLE; 1201 spte &= ~SPTE_MMU_WRITEABLE;
1209 spte = spte & ~PT_WRITABLE_MASK; 1202 spte = spte & ~PT_WRITABLE_MASK;
1210 1203
1211 *flush |= mmu_spte_update(sptep, spte); 1204 return mmu_spte_update(sptep, spte);
1212 return false;
1213} 1205}
1214 1206
1215static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, 1207static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
@@ -1221,11 +1213,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1221 1213
1222 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { 1214 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1223 BUG_ON(!(*sptep & PT_PRESENT_MASK)); 1215 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1224 if (spte_write_protect(kvm, sptep, &flush, pt_protect)) {
1225 sptep = rmap_get_first(*rmapp, &iter);
1226 continue;
1227 }
1228 1216
1217 flush |= spte_write_protect(kvm, sptep, pt_protect);
1229 sptep = rmap_get_next(&iter); 1218 sptep = rmap_get_next(&iter);
1230 } 1219 }
1231 1220
@@ -2877,6 +2866,19 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
2877 goto exit; 2866 goto exit;
2878 2867
2879 /* 2868 /*
2869 * Do not fix write-permission on the large spte since we only dirty
2870 * the first page into the dirty-bitmap in fast_pf_fix_direct_spte()
2871 * that means other pages are missed if its slot is dirty-logged.
2872 *
2873 * Instead, we let the slow page fault path create a normal spte to
2874 * fix the access.
2875 *
2876 * See the comments in kvm_arch_commit_memory_region().
2877 */
2878 if (sp->role.level > PT_PAGE_TABLE_LEVEL)
2879 goto exit;
2880
2881 /*
2880 * Currently, fast page fault only works for direct mapping since 2882 * Currently, fast page fault only works for direct mapping since
2881 * the gfn is not stable for indirect shadow page. 2883 * the gfn is not stable for indirect shadow page.
2882 * See Documentation/virtual/kvm/locking.txt to get more detail. 2884 * See Documentation/virtual/kvm/locking.txt to get more detail.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7cc646626afd..63a828d206c5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7315,8 +7315,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
7315 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); 7315 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
7316 /* 7316 /*
7317 * Write protect all pages for dirty logging. 7317 * Write protect all pages for dirty logging.
7318 * Existing largepage mappings are destroyed here and new ones will 7318 *
7319 * not be created until the end of the logging. 7319 * All the sptes including the large sptes which point to this
7320 * slot are set to readonly. We can not create any new large
7321 * spte on this slot until the end of the logging.
7322 *
7323 * See the comments in fast_page_fault().
7320 */ 7324 */
7321 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) 7325 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
7322 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 7326 kvm_mmu_slot_remove_write_access(kvm, mem->slot);