diff options
author | Xiao Guangrong <xiaoguangrong@cn.fujitsu.com> | 2010-08-22 07:12:48 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2010-10-24 04:51:27 -0400 |
commit | 957ed9effd80b04482cbdce8c95bdf803a656b94 (patch) | |
tree | 0d554cf6ca80fbb2bb1d3ef9b06f2a210a6b171f /arch | |
parent | 48987781eb1d1e8ded41f55cd5806615fda92c6e (diff) |
KVM: MMU: prefetch ptes when intercepted guest #PF
Support prefetch ptes when intercept guest #PF, avoid to #PF by later
access
If we meet any failure in the prefetch path, we will exit it and
not try other ptes to avoid become heavy path
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kvm/mmu.c | 104 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 72 |
2 files changed, 175 insertions, 1 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 54a50268cebf..b0037a77e56b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -89,6 +89,8 @@ module_param(oos_shadow, bool, 0644); | |||
89 | } | 89 | } |
90 | #endif | 90 | #endif |
91 | 91 | ||
92 | #define PTE_PREFETCH_NUM 8 | ||
93 | |||
92 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 94 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
93 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 95 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
94 | 96 | ||
@@ -400,7 +402,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) | |||
400 | if (r) | 402 | if (r) |
401 | goto out; | 403 | goto out; |
402 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, | 404 | r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, |
403 | rmap_desc_cache, 4); | 405 | rmap_desc_cache, 4 + PTE_PREFETCH_NUM); |
404 | if (r) | 406 | if (r) |
405 | goto out; | 407 | goto out; |
406 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); | 408 | r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); |
@@ -2089,6 +2091,105 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2089 | { | 2091 | { |
2090 | } | 2092 | } |
2091 | 2093 | ||
2094 | static struct kvm_memory_slot * | ||
2095 | pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) | ||
2096 | { | ||
2097 | struct kvm_memory_slot *slot; | ||
2098 | |||
2099 | slot = gfn_to_memslot(vcpu->kvm, gfn); | ||
2100 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | ||
2101 | (no_dirty_log && slot->dirty_bitmap)) | ||
2102 | slot = NULL; | ||
2103 | |||
2104 | return slot; | ||
2105 | } | ||
2106 | |||
2107 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
2108 | bool no_dirty_log) | ||
2109 | { | ||
2110 | struct kvm_memory_slot *slot; | ||
2111 | unsigned long hva; | ||
2112 | |||
2113 | slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log); | ||
2114 | if (!slot) { | ||
2115 | get_page(bad_page); | ||
2116 | return page_to_pfn(bad_page); | ||
2117 | } | ||
2118 | |||
2119 | hva = gfn_to_hva_memslot(slot, gfn); | ||
2120 | |||
2121 | return hva_to_pfn_atomic(vcpu->kvm, hva); | ||
2122 | } | ||
2123 | |||
2124 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | ||
2125 | struct kvm_mmu_page *sp, | ||
2126 | u64 *start, u64 *end) | ||
2127 | { | ||
2128 | struct page *pages[PTE_PREFETCH_NUM]; | ||
2129 | unsigned access = sp->role.access; | ||
2130 | int i, ret; | ||
2131 | gfn_t gfn; | ||
2132 | |||
2133 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); | ||
2134 | if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK)) | ||
2135 | return -1; | ||
2136 | |||
2137 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); | ||
2138 | if (ret <= 0) | ||
2139 | return -1; | ||
2140 | |||
2141 | for (i = 0; i < ret; i++, gfn++, start++) | ||
2142 | mmu_set_spte(vcpu, start, ACC_ALL, | ||
2143 | access, 0, 0, 1, NULL, | ||
2144 | sp->role.level, gfn, | ||
2145 | page_to_pfn(pages[i]), true, true); | ||
2146 | |||
2147 | return 0; | ||
2148 | } | ||
2149 | |||
2150 | static void __direct_pte_prefetch(struct kvm_vcpu *vcpu, | ||
2151 | struct kvm_mmu_page *sp, u64 *sptep) | ||
2152 | { | ||
2153 | u64 *spte, *start = NULL; | ||
2154 | int i; | ||
2155 | |||
2156 | WARN_ON(!sp->role.direct); | ||
2157 | |||
2158 | i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1); | ||
2159 | spte = sp->spt + i; | ||
2160 | |||
2161 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { | ||
2162 | if (*spte != shadow_trap_nonpresent_pte || spte == sptep) { | ||
2163 | if (!start) | ||
2164 | continue; | ||
2165 | if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0) | ||
2166 | break; | ||
2167 | start = NULL; | ||
2168 | } else if (!start) | ||
2169 | start = spte; | ||
2170 | } | ||
2171 | } | ||
2172 | |||
2173 | static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | ||
2174 | { | ||
2175 | struct kvm_mmu_page *sp; | ||
2176 | |||
2177 | /* | ||
2178 | * Since it's no accessed bit on EPT, it's no way to | ||
2179 | * distinguish between actually accessed translations | ||
2180 | * and prefetched, so disable pte prefetch if EPT is | ||
2181 | * enabled. | ||
2182 | */ | ||
2183 | if (!shadow_accessed_mask) | ||
2184 | return; | ||
2185 | |||
2186 | sp = page_header(__pa(sptep)); | ||
2187 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | ||
2188 | return; | ||
2189 | |||
2190 | __direct_pte_prefetch(vcpu, sp, sptep); | ||
2191 | } | ||
2192 | |||
2092 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 2193 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
2093 | int level, gfn_t gfn, pfn_t pfn) | 2194 | int level, gfn_t gfn, pfn_t pfn) |
2094 | { | 2195 | { |
@@ -2102,6 +2203,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2102 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 2203 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
2103 | 0, write, 1, &pt_write, | 2204 | 0, write, 1, &pt_write, |
2104 | level, gfn, pfn, false, true); | 2205 | level, gfn, pfn, false, true); |
2206 | direct_pte_prefetch(vcpu, iterator.sptep); | ||
2105 | ++vcpu->stat.pf_fixed; | 2207 | ++vcpu->stat.pf_fixed; |
2106 | break; | 2208 | break; |
2107 | } | 2209 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 51ef9097960d..872ff265c91e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -310,6 +310,77 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, | |||
310 | return r || curr_pte != gw->ptes[level - 1]; | 310 | return r || curr_pte != gw->ptes[level - 1]; |
311 | } | 311 | } |
312 | 312 | ||
313 | static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep) | ||
314 | { | ||
315 | struct kvm_mmu_page *sp; | ||
316 | pt_element_t gptep[PTE_PREFETCH_NUM]; | ||
317 | gpa_t first_pte_gpa; | ||
318 | int offset = 0, i; | ||
319 | u64 *spte; | ||
320 | |||
321 | sp = page_header(__pa(sptep)); | ||
322 | |||
323 | if (sp->role.level > PT_PAGE_TABLE_LEVEL) | ||
324 | return; | ||
325 | |||
326 | if (sp->role.direct) | ||
327 | return __direct_pte_prefetch(vcpu, sp, sptep); | ||
328 | |||
329 | i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1); | ||
330 | |||
331 | if (PTTYPE == 32) | ||
332 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
333 | |||
334 | first_pte_gpa = gfn_to_gpa(sp->gfn) + | ||
335 | (offset + i) * sizeof(pt_element_t); | ||
336 | |||
337 | if (kvm_read_guest_atomic(vcpu->kvm, first_pte_gpa, gptep, | ||
338 | sizeof(gptep)) < 0) | ||
339 | return; | ||
340 | |||
341 | spte = sp->spt + i; | ||
342 | |||
343 | for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { | ||
344 | pt_element_t gpte; | ||
345 | unsigned pte_access; | ||
346 | gfn_t gfn; | ||
347 | pfn_t pfn; | ||
348 | bool dirty; | ||
349 | |||
350 | if (spte == sptep) | ||
351 | continue; | ||
352 | |||
353 | if (*spte != shadow_trap_nonpresent_pte) | ||
354 | continue; | ||
355 | |||
356 | gpte = gptep[i]; | ||
357 | |||
358 | if (!is_present_gpte(gpte) || | ||
359 | is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) { | ||
360 | if (!sp->unsync) | ||
361 | __set_spte(spte, shadow_notrap_nonpresent_pte); | ||
362 | continue; | ||
363 | } | ||
364 | |||
365 | if (!(gpte & PT_ACCESSED_MASK)) | ||
366 | continue; | ||
367 | |||
368 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | ||
369 | gfn = gpte_to_gfn(gpte); | ||
370 | dirty = is_dirty_gpte(gpte); | ||
371 | pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, | ||
372 | (pte_access & ACC_WRITE_MASK) && dirty); | ||
373 | if (is_error_pfn(pfn)) { | ||
374 | kvm_release_pfn_clean(pfn); | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | ||
379 | dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn, | ||
380 | pfn, true, true); | ||
381 | } | ||
382 | } | ||
383 | |||
313 | /* | 384 | /* |
314 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 385 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
315 | */ | 386 | */ |
@@ -391,6 +462,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
391 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, | 462 | mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, |
392 | user_fault, write_fault, dirty, ptwrite, it.level, | 463 | user_fault, write_fault, dirty, ptwrite, it.level, |
393 | gw->gfn, pfn, false, true); | 464 | gw->gfn, pfn, false, true); |
465 | FNAME(pte_prefetch)(vcpu, it.sptep); | ||
394 | 466 | ||
395 | return it.sptep; | 467 | return it.sptep; |
396 | 468 | ||