aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c251
1 files changed, 161 insertions, 90 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fbb04aee830..9cafbb49981 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,9 +18,11 @@
18 * 18 *
19 */ 19 */
20 20
21#include "irq.h"
21#include "mmu.h" 22#include "mmu.h"
22#include "x86.h" 23#include "x86.h"
23#include "kvm_cache_regs.h" 24#include "kvm_cache_regs.h"
25#include "x86.h"
24 26
25#include <linux/kvm_host.h> 27#include <linux/kvm_host.h>
26#include <linux/types.h> 28#include <linux/types.h>
@@ -194,7 +196,6 @@ static struct percpu_counter kvm_total_used_mmu_pages;
194 196
195static u64 __read_mostly shadow_trap_nonpresent_pte; 197static u64 __read_mostly shadow_trap_nonpresent_pte;
196static u64 __read_mostly shadow_notrap_nonpresent_pte; 198static u64 __read_mostly shadow_notrap_nonpresent_pte;
197static u64 __read_mostly shadow_base_present_pte;
198static u64 __read_mostly shadow_nx_mask; 199static u64 __read_mostly shadow_nx_mask;
199static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ 200static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
200static u64 __read_mostly shadow_user_mask; 201static u64 __read_mostly shadow_user_mask;
@@ -213,12 +214,6 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
213} 214}
214EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); 215EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
215 216
216void kvm_mmu_set_base_ptes(u64 base_pte)
217{
218 shadow_base_present_pte = base_pte;
219}
220EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
221
222void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, 217void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
223 u64 dirty_mask, u64 nx_mask, u64 x_mask) 218 u64 dirty_mask, u64 nx_mask, u64 x_mask)
224{ 219{
@@ -482,46 +477,46 @@ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
482} 477}
483 478
484/* 479/*
485 * Return the pointer to the largepage write count for a given 480 * Return the pointer to the large page information for a given gfn,
486 * gfn, handling slots that are not large page aligned. 481 * handling slots that are not large page aligned.
487 */ 482 */
488static int *slot_largepage_idx(gfn_t gfn, 483static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
489 struct kvm_memory_slot *slot, 484 struct kvm_memory_slot *slot,
490 int level) 485 int level)
491{ 486{
492 unsigned long idx; 487 unsigned long idx;
493 488
494 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 489 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
495 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 490 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
496 return &slot->lpage_info[level - 2][idx].write_count; 491 return &slot->lpage_info[level - 2][idx];
497} 492}
498 493
499static void account_shadowed(struct kvm *kvm, gfn_t gfn) 494static void account_shadowed(struct kvm *kvm, gfn_t gfn)
500{ 495{
501 struct kvm_memory_slot *slot; 496 struct kvm_memory_slot *slot;
502 int *write_count; 497 struct kvm_lpage_info *linfo;
503 int i; 498 int i;
504 499
505 slot = gfn_to_memslot(kvm, gfn); 500 slot = gfn_to_memslot(kvm, gfn);
506 for (i = PT_DIRECTORY_LEVEL; 501 for (i = PT_DIRECTORY_LEVEL;
507 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 502 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
508 write_count = slot_largepage_idx(gfn, slot, i); 503 linfo = lpage_info_slot(gfn, slot, i);
509 *write_count += 1; 504 linfo->write_count += 1;
510 } 505 }
511} 506}
512 507
513static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) 508static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
514{ 509{
515 struct kvm_memory_slot *slot; 510 struct kvm_memory_slot *slot;
516 int *write_count; 511 struct kvm_lpage_info *linfo;
517 int i; 512 int i;
518 513
519 slot = gfn_to_memslot(kvm, gfn); 514 slot = gfn_to_memslot(kvm, gfn);
520 for (i = PT_DIRECTORY_LEVEL; 515 for (i = PT_DIRECTORY_LEVEL;
521 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { 516 i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
522 write_count = slot_largepage_idx(gfn, slot, i); 517 linfo = lpage_info_slot(gfn, slot, i);
523 *write_count -= 1; 518 linfo->write_count -= 1;
524 WARN_ON(*write_count < 0); 519 WARN_ON(linfo->write_count < 0);
525 } 520 }
526} 521}
527 522
@@ -530,12 +525,12 @@ static int has_wrprotected_page(struct kvm *kvm,
530 int level) 525 int level)
531{ 526{
532 struct kvm_memory_slot *slot; 527 struct kvm_memory_slot *slot;
533 int *largepage_idx; 528 struct kvm_lpage_info *linfo;
534 529
535 slot = gfn_to_memslot(kvm, gfn); 530 slot = gfn_to_memslot(kvm, gfn);
536 if (slot) { 531 if (slot) {
537 largepage_idx = slot_largepage_idx(gfn, slot, level); 532 linfo = lpage_info_slot(gfn, slot, level);
538 return *largepage_idx; 533 return linfo->write_count;
539 } 534 }
540 535
541 return 1; 536 return 1;
@@ -590,16 +585,15 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
590static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) 585static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
591{ 586{
592 struct kvm_memory_slot *slot; 587 struct kvm_memory_slot *slot;
593 unsigned long idx; 588 struct kvm_lpage_info *linfo;
594 589
595 slot = gfn_to_memslot(kvm, gfn); 590 slot = gfn_to_memslot(kvm, gfn);
596 if (likely(level == PT_PAGE_TABLE_LEVEL)) 591 if (likely(level == PT_PAGE_TABLE_LEVEL))
597 return &slot->rmap[gfn - slot->base_gfn]; 592 return &slot->rmap[gfn - slot->base_gfn];
598 593
599 idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - 594 linfo = lpage_info_slot(gfn, slot, level);
600 (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
601 595
602 return &slot->lpage_info[level - 2][idx].rmap_pde; 596 return &linfo->rmap_pde;
603} 597}
604 598
605/* 599/*
@@ -887,19 +881,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
887 end = start + (memslot->npages << PAGE_SHIFT); 881 end = start + (memslot->npages << PAGE_SHIFT);
888 if (hva >= start && hva < end) { 882 if (hva >= start && hva < end) {
889 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 883 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
884 gfn_t gfn = memslot->base_gfn + gfn_offset;
890 885
891 ret = handler(kvm, &memslot->rmap[gfn_offset], data); 886 ret = handler(kvm, &memslot->rmap[gfn_offset], data);
892 887
893 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 888 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
894 unsigned long idx; 889 struct kvm_lpage_info *linfo;
895 int sh; 890
896 891 linfo = lpage_info_slot(gfn, memslot,
897 sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); 892 PT_DIRECTORY_LEVEL + j);
898 idx = ((memslot->base_gfn+gfn_offset) >> sh) - 893 ret |= handler(kvm, &linfo->rmap_pde, data);
899 (memslot->base_gfn >> sh);
900 ret |= handler(kvm,
901 &memslot->lpage_info[j][idx].rmap_pde,
902 data);
903 } 894 }
904 trace_kvm_age_page(hva, memslot, ret); 895 trace_kvm_age_page(hva, memslot, ret);
905 retval |= ret; 896 retval |= ret;
@@ -1161,7 +1152,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
1161} 1152}
1162 1153
1163static int nonpaging_sync_page(struct kvm_vcpu *vcpu, 1154static int nonpaging_sync_page(struct kvm_vcpu *vcpu,
1164 struct kvm_mmu_page *sp, bool clear_unsync) 1155 struct kvm_mmu_page *sp)
1165{ 1156{
1166 return 1; 1157 return 1;
1167} 1158}
@@ -1291,7 +1282,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1291 if (clear_unsync) 1282 if (clear_unsync)
1292 kvm_unlink_unsync_page(vcpu->kvm, sp); 1283 kvm_unlink_unsync_page(vcpu->kvm, sp);
1293 1284
1294 if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { 1285 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
1295 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); 1286 kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
1296 return 1; 1287 return 1;
1297 } 1288 }
@@ -1332,12 +1323,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn)
1332 continue; 1323 continue;
1333 1324
1334 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); 1325 WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL);
1326 kvm_unlink_unsync_page(vcpu->kvm, s);
1335 if ((s->role.cr4_pae != !!is_pae(vcpu)) || 1327 if ((s->role.cr4_pae != !!is_pae(vcpu)) ||
1336 (vcpu->arch.mmu.sync_page(vcpu, s, true))) { 1328 (vcpu->arch.mmu.sync_page(vcpu, s))) {
1337 kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); 1329 kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list);
1338 continue; 1330 continue;
1339 } 1331 }
1340 kvm_unlink_unsync_page(vcpu->kvm, s);
1341 flush = true; 1332 flush = true;
1342 } 1333 }
1343 1334
@@ -1963,9 +1954,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1963 unsigned pte_access, int user_fault, 1954 unsigned pte_access, int user_fault,
1964 int write_fault, int dirty, int level, 1955 int write_fault, int dirty, int level,
1965 gfn_t gfn, pfn_t pfn, bool speculative, 1956 gfn_t gfn, pfn_t pfn, bool speculative,
1966 bool can_unsync, bool reset_host_protection) 1957 bool can_unsync, bool host_writable)
1967{ 1958{
1968 u64 spte; 1959 u64 spte, entry = *sptep;
1969 int ret = 0; 1960 int ret = 0;
1970 1961
1971 /* 1962 /*
@@ -1973,7 +1964,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1973 * whether the guest actually used the pte (in order to detect 1964 * whether the guest actually used the pte (in order to detect
1974 * demand paging). 1965 * demand paging).
1975 */ 1966 */
1976 spte = shadow_base_present_pte; 1967 spte = PT_PRESENT_MASK;
1977 if (!speculative) 1968 if (!speculative)
1978 spte |= shadow_accessed_mask; 1969 spte |= shadow_accessed_mask;
1979 if (!dirty) 1970 if (!dirty)
@@ -1990,8 +1981,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1990 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 1981 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
1991 kvm_is_mmio_pfn(pfn)); 1982 kvm_is_mmio_pfn(pfn));
1992 1983
1993 if (reset_host_protection) 1984 if (host_writable)
1994 spte |= SPTE_HOST_WRITEABLE; 1985 spte |= SPTE_HOST_WRITEABLE;
1986 else
1987 pte_access &= ~ACC_WRITE_MASK;
1995 1988
1996 spte |= (u64)pfn << PAGE_SHIFT; 1989 spte |= (u64)pfn << PAGE_SHIFT;
1997 1990
@@ -2036,6 +2029,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2036 2029
2037set_pte: 2030set_pte:
2038 update_spte(sptep, spte); 2031 update_spte(sptep, spte);
2032 /*
2033 * If we overwrite a writable spte with a read-only one we
2034 * should flush remote TLBs. Otherwise rmap_write_protect
2035 * will find a read-only spte, even though the writable spte
2036 * might be cached on a CPU's TLB.
2037 */
2038 if (is_writable_pte(entry) && !is_writable_pte(*sptep))
2039 kvm_flush_remote_tlbs(vcpu->kvm);
2039done: 2040done:
2040 return ret; 2041 return ret;
2041} 2042}
@@ -2045,7 +2046,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2045 int user_fault, int write_fault, int dirty, 2046 int user_fault, int write_fault, int dirty,
2046 int *ptwrite, int level, gfn_t gfn, 2047 int *ptwrite, int level, gfn_t gfn,
2047 pfn_t pfn, bool speculative, 2048 pfn_t pfn, bool speculative,
2048 bool reset_host_protection) 2049 bool host_writable)
2049{ 2050{
2050 int was_rmapped = 0; 2051 int was_rmapped = 0;
2051 int rmap_count; 2052 int rmap_count;
@@ -2080,7 +2081,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2080 2081
2081 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, 2082 if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
2082 dirty, level, gfn, pfn, speculative, true, 2083 dirty, level, gfn, pfn, speculative, true,
2083 reset_host_protection)) { 2084 host_writable)) {
2084 if (write_fault) 2085 if (write_fault)
2085 *ptwrite = 1; 2086 *ptwrite = 1;
2086 kvm_mmu_flush_tlb(vcpu); 2087 kvm_mmu_flush_tlb(vcpu);
@@ -2211,7 +2212,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
2211} 2212}
2212 2213
2213static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, 2214static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2214 int level, gfn_t gfn, pfn_t pfn) 2215 int map_writable, int level, gfn_t gfn, pfn_t pfn,
2216 bool prefault)
2215{ 2217{
2216 struct kvm_shadow_walk_iterator iterator; 2218 struct kvm_shadow_walk_iterator iterator;
2217 struct kvm_mmu_page *sp; 2219 struct kvm_mmu_page *sp;
@@ -2220,9 +2222,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
2220 2222
2221 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { 2223 for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
2222 if (iterator.level == level) { 2224 if (iterator.level == level) {
2223 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 2225 unsigned pte_access = ACC_ALL;
2226
2227 mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
2224 0, write, 1, &pt_write, 2228 0, write, 1, &pt_write,
2225 level, gfn, pfn, false, true); 2229 level, gfn, pfn, prefault, map_writable);
2226 direct_pte_prefetch(vcpu, iterator.sptep); 2230 direct_pte_prefetch(vcpu, iterator.sptep);
2227 ++vcpu->stat.pf_fixed; 2231 ++vcpu->stat.pf_fixed;
2228 break; 2232 break;
@@ -2277,12 +2281,17 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn)
2277 return 1; 2281 return 1;
2278} 2282}
2279 2283
2280static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) 2284static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
2285 gva_t gva, pfn_t *pfn, bool write, bool *writable);
2286
2287static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn,
2288 bool prefault)
2281{ 2289{
2282 int r; 2290 int r;
2283 int level; 2291 int level;
2284 pfn_t pfn; 2292 pfn_t pfn;
2285 unsigned long mmu_seq; 2293 unsigned long mmu_seq;
2294 bool map_writable;
2286 2295
2287 level = mapping_level(vcpu, gfn); 2296 level = mapping_level(vcpu, gfn);
2288 2297
@@ -2297,7 +2306,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
2297 2306
2298 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2307 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2299 smp_rmb(); 2308 smp_rmb();
2300 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2309
2310 if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable))
2311 return 0;
2301 2312
2302 /* mmio */ 2313 /* mmio */
2303 if (is_error_pfn(pfn)) 2314 if (is_error_pfn(pfn))
@@ -2307,7 +2318,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
2307 if (mmu_notifier_retry(vcpu, mmu_seq)) 2318 if (mmu_notifier_retry(vcpu, mmu_seq))
2308 goto out_unlock; 2319 goto out_unlock;
2309 kvm_mmu_free_some_pages(vcpu); 2320 kvm_mmu_free_some_pages(vcpu);
2310 r = __direct_map(vcpu, v, write, level, gfn, pfn); 2321 r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
2322 prefault);
2311 spin_unlock(&vcpu->kvm->mmu_lock); 2323 spin_unlock(&vcpu->kvm->mmu_lock);
2312 2324
2313 2325
@@ -2530,6 +2542,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
2530 hpa_t root = vcpu->arch.mmu.root_hpa; 2542 hpa_t root = vcpu->arch.mmu.root_hpa;
2531 sp = page_header(root); 2543 sp = page_header(root);
2532 mmu_sync_children(vcpu, sp); 2544 mmu_sync_children(vcpu, sp);
2545 trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
2533 return; 2546 return;
2534 } 2547 }
2535 for (i = 0; i < 4; ++i) { 2548 for (i = 0; i < 4; ++i) {
@@ -2552,23 +2565,24 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
2552} 2565}
2553 2566
2554static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, 2567static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
2555 u32 access, u32 *error) 2568 u32 access, struct x86_exception *exception)
2556{ 2569{
2557 if (error) 2570 if (exception)
2558 *error = 0; 2571 exception->error_code = 0;
2559 return vaddr; 2572 return vaddr;
2560} 2573}
2561 2574
2562static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, 2575static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
2563 u32 access, u32 *error) 2576 u32 access,
2577 struct x86_exception *exception)
2564{ 2578{
2565 if (error) 2579 if (exception)
2566 *error = 0; 2580 exception->error_code = 0;
2567 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); 2581 return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
2568} 2582}
2569 2583
2570static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, 2584static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
2571 u32 error_code) 2585 u32 error_code, bool prefault)
2572{ 2586{
2573 gfn_t gfn; 2587 gfn_t gfn;
2574 int r; 2588 int r;
@@ -2584,17 +2598,67 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
2584 gfn = gva >> PAGE_SHIFT; 2598 gfn = gva >> PAGE_SHIFT;
2585 2599
2586 return nonpaging_map(vcpu, gva & PAGE_MASK, 2600 return nonpaging_map(vcpu, gva & PAGE_MASK,
2587 error_code & PFERR_WRITE_MASK, gfn); 2601 error_code & PFERR_WRITE_MASK, gfn, prefault);
2602}
2603
2604static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
2605{
2606 struct kvm_arch_async_pf arch;
2607
2608 arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
2609 arch.gfn = gfn;
2610 arch.direct_map = vcpu->arch.mmu.direct_map;
2611 arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
2612
2613 return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
2588} 2614}
2589 2615
2590static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, 2616static bool can_do_async_pf(struct kvm_vcpu *vcpu)
2591 u32 error_code) 2617{
2618 if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
2619 kvm_event_needs_reinjection(vcpu)))
2620 return false;
2621
2622 return kvm_x86_ops->interrupt_allowed(vcpu);
2623}
2624
2625static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
2626 gva_t gva, pfn_t *pfn, bool write, bool *writable)
2627{
2628 bool async;
2629
2630 *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable);
2631
2632 if (!async)
2633 return false; /* *pfn has correct page already */
2634
2635 put_page(pfn_to_page(*pfn));
2636
2637 if (!prefault && can_do_async_pf(vcpu)) {
2638 trace_kvm_try_async_get_page(gva, gfn);
2639 if (kvm_find_async_pf_gfn(vcpu, gfn)) {
2640 trace_kvm_async_pf_doublefault(gva, gfn);
2641 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
2642 return true;
2643 } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
2644 return true;
2645 }
2646
2647 *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable);
2648
2649 return false;
2650}
2651
2652static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
2653 bool prefault)
2592{ 2654{
2593 pfn_t pfn; 2655 pfn_t pfn;
2594 int r; 2656 int r;
2595 int level; 2657 int level;
2596 gfn_t gfn = gpa >> PAGE_SHIFT; 2658 gfn_t gfn = gpa >> PAGE_SHIFT;
2597 unsigned long mmu_seq; 2659 unsigned long mmu_seq;
2660 int write = error_code & PFERR_WRITE_MASK;
2661 bool map_writable;
2598 2662
2599 ASSERT(vcpu); 2663 ASSERT(vcpu);
2600 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 2664 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -2609,15 +2673,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
2609 2673
2610 mmu_seq = vcpu->kvm->mmu_notifier_seq; 2674 mmu_seq = vcpu->kvm->mmu_notifier_seq;
2611 smp_rmb(); 2675 smp_rmb();
2612 pfn = gfn_to_pfn(vcpu->kvm, gfn); 2676
2677 if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
2678 return 0;
2679
2680 /* mmio */
2613 if (is_error_pfn(pfn)) 2681 if (is_error_pfn(pfn))
2614 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); 2682 return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
2615 spin_lock(&vcpu->kvm->mmu_lock); 2683 spin_lock(&vcpu->kvm->mmu_lock);
2616 if (mmu_notifier_retry(vcpu, mmu_seq)) 2684 if (mmu_notifier_retry(vcpu, mmu_seq))
2617 goto out_unlock; 2685 goto out_unlock;
2618 kvm_mmu_free_some_pages(vcpu); 2686 kvm_mmu_free_some_pages(vcpu);
2619 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 2687 r = __direct_map(vcpu, gpa, write, map_writable,
2620 level, gfn, pfn); 2688 level, gfn, pfn, prefault);
2621 spin_unlock(&vcpu->kvm->mmu_lock); 2689 spin_unlock(&vcpu->kvm->mmu_lock);
2622 2690
2623 return r; 2691 return r;
@@ -2659,18 +2727,19 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
2659 2727
2660static void paging_new_cr3(struct kvm_vcpu *vcpu) 2728static void paging_new_cr3(struct kvm_vcpu *vcpu)
2661{ 2729{
2662 pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); 2730 pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu));
2663 mmu_free_roots(vcpu); 2731 mmu_free_roots(vcpu);
2664} 2732}
2665 2733
2666static unsigned long get_cr3(struct kvm_vcpu *vcpu) 2734static unsigned long get_cr3(struct kvm_vcpu *vcpu)
2667{ 2735{
2668 return vcpu->arch.cr3; 2736 return kvm_read_cr3(vcpu);
2669} 2737}
2670 2738
2671static void inject_page_fault(struct kvm_vcpu *vcpu) 2739static void inject_page_fault(struct kvm_vcpu *vcpu,
2740 struct x86_exception *fault)
2672{ 2741{
2673 vcpu->arch.mmu.inject_page_fault(vcpu); 2742 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
2674} 2743}
2675 2744
2676static void paging_free(struct kvm_vcpu *vcpu) 2745static void paging_free(struct kvm_vcpu *vcpu)
@@ -2816,6 +2885,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
2816{ 2885{
2817 struct kvm_mmu *context = vcpu->arch.walk_mmu; 2886 struct kvm_mmu *context = vcpu->arch.walk_mmu;
2818 2887
2888 context->base_role.word = 0;
2819 context->new_cr3 = nonpaging_new_cr3; 2889 context->new_cr3 = nonpaging_new_cr3;
2820 context->page_fault = tdp_page_fault; 2890 context->page_fault = tdp_page_fault;
2821 context->free = nonpaging_free; 2891 context->free = nonpaging_free;
@@ -3008,9 +3078,6 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
3008 return; 3078 return;
3009 } 3079 }
3010 3080
3011 if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL))
3012 return;
3013
3014 ++vcpu->kvm->stat.mmu_pte_updated; 3081 ++vcpu->kvm->stat.mmu_pte_updated;
3015 if (!sp->role.cr4_pae) 3082 if (!sp->role.cr4_pae)
3016 paging32_update_pte(vcpu, sp, spte, new); 3083 paging32_update_pte(vcpu, sp, spte, new);
@@ -3264,12 +3331,13 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
3264 } 3331 }
3265} 3332}
3266 3333
3267int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) 3334int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
3335 void *insn, int insn_len)
3268{ 3336{
3269 int r; 3337 int r;
3270 enum emulation_result er; 3338 enum emulation_result er;
3271 3339
3272 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); 3340 r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
3273 if (r < 0) 3341 if (r < 0)
3274 goto out; 3342 goto out;
3275 3343
@@ -3282,7 +3350,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
3282 if (r) 3350 if (r)
3283 goto out; 3351 goto out;
3284 3352
3285 er = emulate_instruction(vcpu, cr2, error_code, 0); 3353 er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len);
3286 3354
3287 switch (er) { 3355 switch (er) {
3288 case EMULATE_DONE: 3356 case EMULATE_DONE:
@@ -3377,11 +3445,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
3377 if (!test_bit(slot, sp->slot_bitmap)) 3445 if (!test_bit(slot, sp->slot_bitmap))
3378 continue; 3446 continue;
3379 3447
3448 if (sp->role.level != PT_PAGE_TABLE_LEVEL)
3449 continue;
3450
3380 pt = sp->spt; 3451 pt = sp->spt;
3381 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 3452 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
3382 /* avoid RMW */ 3453 /* avoid RMW */
3383 if (is_writable_pte(pt[i])) 3454 if (is_writable_pte(pt[i]))
3384 pt[i] &= ~PT_WRITABLE_MASK; 3455 update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK);
3385 } 3456 }
3386 kvm_flush_remote_tlbs(kvm); 3457 kvm_flush_remote_tlbs(kvm);
3387} 3458}
@@ -3463,13 +3534,6 @@ static void mmu_destroy_caches(void)
3463 kmem_cache_destroy(mmu_page_header_cache); 3534 kmem_cache_destroy(mmu_page_header_cache);
3464} 3535}
3465 3536
3466void kvm_mmu_module_exit(void)
3467{
3468 mmu_destroy_caches();
3469 percpu_counter_destroy(&kvm_total_used_mmu_pages);
3470 unregister_shrinker(&mmu_shrinker);
3471}
3472
3473int kvm_mmu_module_init(void) 3537int kvm_mmu_module_init(void)
3474{ 3538{
3475 pte_chain_cache = kmem_cache_create("kvm_pte_chain", 3539 pte_chain_cache = kmem_cache_create("kvm_pte_chain",
@@ -3566,7 +3630,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
3566 3630
3567static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) 3631static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
3568{ 3632{
3569 (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); 3633 (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu));
3570 return 1; 3634 return 1;
3571} 3635}
3572 3636
@@ -3662,12 +3726,6 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
3662} 3726}
3663EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); 3727EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy);
3664 3728
3665#ifdef CONFIG_KVM_MMU_AUDIT
3666#include "mmu_audit.c"
3667#else
3668static void mmu_audit_disable(void) { }
3669#endif
3670
3671void kvm_mmu_destroy(struct kvm_vcpu *vcpu) 3729void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
3672{ 3730{
3673 ASSERT(vcpu); 3731 ASSERT(vcpu);
@@ -3675,5 +3733,18 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
3675 destroy_kvm_mmu(vcpu); 3733 destroy_kvm_mmu(vcpu);
3676 free_mmu_pages(vcpu); 3734 free_mmu_pages(vcpu);
3677 mmu_free_memory_caches(vcpu); 3735 mmu_free_memory_caches(vcpu);
3736}
3737
3738#ifdef CONFIG_KVM_MMU_AUDIT
3739#include "mmu_audit.c"
3740#else
3741static void mmu_audit_disable(void) { }
3742#endif
3743
3744void kvm_mmu_module_exit(void)
3745{
3746 mmu_destroy_caches();
3747 percpu_counter_destroy(&kvm_total_used_mmu_pages);
3748 unregister_shrinker(&mmu_shrinker);
3678 mmu_audit_disable(); 3749 mmu_audit_disable();
3679} 3750}