diff options
author | Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> | 2014-04-17 05:06:14 -0400 |
---|---|---|
committer | Marcelo Tosatti <mtosatti@redhat.com> | 2014-04-23 16:49:50 -0400 |
commit | c126d94f2c90ed9daee24a94f1c67aff7e9bf387 (patch) | |
tree | ec5b3af6af89179ff9565c9f0fd32c7914a779ed /arch/x86/kvm | |
parent | 92a476cbfc476c63ee982dd33d15a8c88b4d51b9 (diff) |
KVM: MMU: lazily drop large spte
Currently, kvm zaps the large spte if write-protected is needed, the later
read can fault on that spte. Actually, we can make the large spte readonly
instead of making them un-present, the page fault caused by read access can
be avoided
The idea is from Avi:
| As I mentioned before, write-protecting a large spte is a good idea,
| since it moves some work from protect-time to fault-time, so it reduces
| jitter. This removes the need for the return value.
This version has fixed the issue reported in 6b73a9606, the reason of that
issue is that fast_page_fault() directly sets the readonly large spte to
writable but only dirty the first page into the dirty-bitmap that means
other pages are missed. Fixed it by only the normal sptes (on the
PT_PAGE_TABLE_LEVEL level) can be fast fixed
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/mmu.c | 34 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 8 |
2 files changed, 24 insertions, 18 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 63107049249d..ddf06963a74c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -1176,8 +1176,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1176 | 1176 | ||
1177 | /* | 1177 | /* |
1178 | * Write-protect on the specified @sptep, @pt_protect indicates whether | 1178 | * Write-protect on the specified @sptep, @pt_protect indicates whether |
1179 | * spte writ-protection is caused by protecting shadow page table. | 1179 | * spte write-protection is caused by protecting shadow page table. |
1180 | * @flush indicates whether tlb need be flushed. | ||
1181 | * | 1180 | * |
1182 | * Note: write protection is difference between drity logging and spte | 1181 | * Note: write protection is difference between drity logging and spte |
1183 | * protection: | 1182 | * protection: |
@@ -1186,10 +1185,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) | |||
1186 | * - for spte protection, the spte can be writable only after unsync-ing | 1185 | * - for spte protection, the spte can be writable only after unsync-ing |
1187 | * shadow page. | 1186 | * shadow page. |
1188 | * | 1187 | * |
1189 | * Return true if the spte is dropped. | 1188 | * Return true if tlb need be flushed. |
1190 | */ | 1189 | */ |
1191 | static bool | 1190 | static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) |
1192 | spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | ||
1193 | { | 1191 | { |
1194 | u64 spte = *sptep; | 1192 | u64 spte = *sptep; |
1195 | 1193 | ||
@@ -1199,17 +1197,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) | |||
1199 | 1197 | ||
1200 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); | 1198 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); |
1201 | 1199 | ||
1202 | if (__drop_large_spte(kvm, sptep)) { | ||
1203 | *flush |= true; | ||
1204 | return true; | ||
1205 | } | ||
1206 | |||
1207 | if (pt_protect) | 1200 | if (pt_protect) |
1208 | spte &= ~SPTE_MMU_WRITEABLE; | 1201 | spte &= ~SPTE_MMU_WRITEABLE; |
1209 | spte = spte & ~PT_WRITABLE_MASK; | 1202 | spte = spte & ~PT_WRITABLE_MASK; |
1210 | 1203 | ||
1211 | *flush |= mmu_spte_update(sptep, spte); | 1204 | return mmu_spte_update(sptep, spte); |
1212 | return false; | ||
1213 | } | 1205 | } |
1214 | 1206 | ||
1215 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | 1207 | static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, |
@@ -1221,11 +1213,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, | |||
1221 | 1213 | ||
1222 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { | 1214 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1223 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); | 1215 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1224 | if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { | ||
1225 | sptep = rmap_get_first(*rmapp, &iter); | ||
1226 | continue; | ||
1227 | } | ||
1228 | 1216 | ||
1217 | flush |= spte_write_protect(kvm, sptep, pt_protect); | ||
1229 | sptep = rmap_get_next(&iter); | 1218 | sptep = rmap_get_next(&iter); |
1230 | } | 1219 | } |
1231 | 1220 | ||
@@ -2877,6 +2866,19 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2877 | goto exit; | 2866 | goto exit; |
2878 | 2867 | ||
2879 | /* | 2868 | /* |
2869 | * Do not fix write-permission on the large spte since we only dirty | ||
2870 | * the first page into the dirty-bitmap in fast_pf_fix_direct_spte() | ||
2871 | * that means other pages are missed if its slot is dirty-logged. | ||
2872 | * | ||
2873 | * Instead, we let the slow page fault path create a normal spte to | ||
2874 | * fix the access. | ||
2875 | * | ||
2876 | * See the comments in kvm_arch_commit_memory_region(). | ||
2877 | */ | ||
2878 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | ||
2879 | goto exit; | ||
2880 | |||
2881 | /* | ||
2880 | * Currently, fast page fault only works for direct mapping since | 2882 | * Currently, fast page fault only works for direct mapping since |
2881 | * the gfn is not stable for indirect shadow page. | 2883 | * the gfn is not stable for indirect shadow page. |
2882 | * See Documentation/virtual/kvm/locking.txt to get more detail. | 2884 | * See Documentation/virtual/kvm/locking.txt to get more detail. |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7cc646626afd..63a828d206c5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -7315,8 +7315,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
7315 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 7315 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
7316 | /* | 7316 | /* |
7317 | * Write protect all pages for dirty logging. | 7317 | * Write protect all pages for dirty logging. |
7318 | * Existing largepage mappings are destroyed here and new ones will | 7318 | * |
7319 | * not be created until the end of the logging. | 7319 | * All the sptes including the large sptes which point to this |
7320 | * slot are set to readonly. We can not create any new large | ||
7321 | * spte on this slot until the end of the logging. | ||
7322 | * | ||
7323 | * See the comments in fast_page_fault(). | ||
7320 | */ | 7324 | */ |
7321 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7325 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
7322 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7326 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |