diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 251 |
1 files changed, 161 insertions, 90 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index fbb04aee830..9cafbb49981 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,9 +18,11 @@ | |||
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include "irq.h" | ||
21 | #include "mmu.h" | 22 | #include "mmu.h" |
22 | #include "x86.h" | 23 | #include "x86.h" |
23 | #include "kvm_cache_regs.h" | 24 | #include "kvm_cache_regs.h" |
25 | #include "x86.h" | ||
24 | 26 | ||
25 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
26 | #include <linux/types.h> | 28 | #include <linux/types.h> |
@@ -194,7 +196,6 @@ static struct percpu_counter kvm_total_used_mmu_pages; | |||
194 | 196 | ||
195 | static u64 __read_mostly shadow_trap_nonpresent_pte; | 197 | static u64 __read_mostly shadow_trap_nonpresent_pte; |
196 | static u64 __read_mostly shadow_notrap_nonpresent_pte; | 198 | static u64 __read_mostly shadow_notrap_nonpresent_pte; |
197 | static u64 __read_mostly shadow_base_present_pte; | ||
198 | static u64 __read_mostly shadow_nx_mask; | 199 | static u64 __read_mostly shadow_nx_mask; |
199 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | 200 | static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ |
200 | static u64 __read_mostly shadow_user_mask; | 201 | static u64 __read_mostly shadow_user_mask; |
@@ -213,12 +214,6 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | |||
213 | } | 214 | } |
214 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); | 215 | EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes); |
215 | 216 | ||
216 | void kvm_mmu_set_base_ptes(u64 base_pte) | ||
217 | { | ||
218 | shadow_base_present_pte = base_pte; | ||
219 | } | ||
220 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | ||
221 | |||
222 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 217 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
223 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 218 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
224 | { | 219 | { |
@@ -482,46 +477,46 @@ static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) | |||
482 | } | 477 | } |
483 | 478 | ||
484 | /* | 479 | /* |
485 | * Return the pointer to the largepage write count for a given | 480 | * Return the pointer to the large page information for a given gfn, |
486 | * gfn, handling slots that are not large page aligned. | 481 | * handling slots that are not large page aligned. |
487 | */ | 482 | */ |
488 | static int *slot_largepage_idx(gfn_t gfn, | 483 | static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, |
489 | struct kvm_memory_slot *slot, | 484 | struct kvm_memory_slot *slot, |
490 | int level) | 485 | int level) |
491 | { | 486 | { |
492 | unsigned long idx; | 487 | unsigned long idx; |
493 | 488 | ||
494 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 489 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - |
495 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 490 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
496 | return &slot->lpage_info[level - 2][idx].write_count; | 491 | return &slot->lpage_info[level - 2][idx]; |
497 | } | 492 | } |
498 | 493 | ||
499 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | 494 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) |
500 | { | 495 | { |
501 | struct kvm_memory_slot *slot; | 496 | struct kvm_memory_slot *slot; |
502 | int *write_count; | 497 | struct kvm_lpage_info *linfo; |
503 | int i; | 498 | int i; |
504 | 499 | ||
505 | slot = gfn_to_memslot(kvm, gfn); | 500 | slot = gfn_to_memslot(kvm, gfn); |
506 | for (i = PT_DIRECTORY_LEVEL; | 501 | for (i = PT_DIRECTORY_LEVEL; |
507 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 502 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
508 | write_count = slot_largepage_idx(gfn, slot, i); | 503 | linfo = lpage_info_slot(gfn, slot, i); |
509 | *write_count += 1; | 504 | linfo->write_count += 1; |
510 | } | 505 | } |
511 | } | 506 | } |
512 | 507 | ||
513 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | 508 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) |
514 | { | 509 | { |
515 | struct kvm_memory_slot *slot; | 510 | struct kvm_memory_slot *slot; |
516 | int *write_count; | 511 | struct kvm_lpage_info *linfo; |
517 | int i; | 512 | int i; |
518 | 513 | ||
519 | slot = gfn_to_memslot(kvm, gfn); | 514 | slot = gfn_to_memslot(kvm, gfn); |
520 | for (i = PT_DIRECTORY_LEVEL; | 515 | for (i = PT_DIRECTORY_LEVEL; |
521 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 516 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { |
522 | write_count = slot_largepage_idx(gfn, slot, i); | 517 | linfo = lpage_info_slot(gfn, slot, i); |
523 | *write_count -= 1; | 518 | linfo->write_count -= 1; |
524 | WARN_ON(*write_count < 0); | 519 | WARN_ON(linfo->write_count < 0); |
525 | } | 520 | } |
526 | } | 521 | } |
527 | 522 | ||
@@ -530,12 +525,12 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
530 | int level) | 525 | int level) |
531 | { | 526 | { |
532 | struct kvm_memory_slot *slot; | 527 | struct kvm_memory_slot *slot; |
533 | int *largepage_idx; | 528 | struct kvm_lpage_info *linfo; |
534 | 529 | ||
535 | slot = gfn_to_memslot(kvm, gfn); | 530 | slot = gfn_to_memslot(kvm, gfn); |
536 | if (slot) { | 531 | if (slot) { |
537 | largepage_idx = slot_largepage_idx(gfn, slot, level); | 532 | linfo = lpage_info_slot(gfn, slot, level); |
538 | return *largepage_idx; | 533 | return linfo->write_count; |
539 | } | 534 | } |
540 | 535 | ||
541 | return 1; | 536 | return 1; |
@@ -590,16 +585,15 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
590 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) | 585 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) |
591 | { | 586 | { |
592 | struct kvm_memory_slot *slot; | 587 | struct kvm_memory_slot *slot; |
593 | unsigned long idx; | 588 | struct kvm_lpage_info *linfo; |
594 | 589 | ||
595 | slot = gfn_to_memslot(kvm, gfn); | 590 | slot = gfn_to_memslot(kvm, gfn); |
596 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 591 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
597 | return &slot->rmap[gfn - slot->base_gfn]; | 592 | return &slot->rmap[gfn - slot->base_gfn]; |
598 | 593 | ||
599 | idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - | 594 | linfo = lpage_info_slot(gfn, slot, level); |
600 | (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | ||
601 | 595 | ||
602 | return &slot->lpage_info[level - 2][idx].rmap_pde; | 596 | return &linfo->rmap_pde; |
603 | } | 597 | } |
604 | 598 | ||
605 | /* | 599 | /* |
@@ -887,19 +881,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
887 | end = start + (memslot->npages << PAGE_SHIFT); | 881 | end = start + (memslot->npages << PAGE_SHIFT); |
888 | if (hva >= start && hva < end) { | 882 | if (hva >= start && hva < end) { |
889 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 883 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
884 | gfn_t gfn = memslot->base_gfn + gfn_offset; | ||
890 | 885 | ||
891 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 886 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
892 | 887 | ||
893 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 888 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
894 | unsigned long idx; | 889 | struct kvm_lpage_info *linfo; |
895 | int sh; | 890 | |
896 | 891 | linfo = lpage_info_slot(gfn, memslot, | |
897 | sh = KVM_HPAGE_GFN_SHIFT(PT_DIRECTORY_LEVEL+j); | 892 | PT_DIRECTORY_LEVEL + j); |
898 | idx = ((memslot->base_gfn+gfn_offset) >> sh) - | 893 | ret |= handler(kvm, &linfo->rmap_pde, data); |
899 | (memslot->base_gfn >> sh); | ||
900 | ret |= handler(kvm, | ||
901 | &memslot->lpage_info[j][idx].rmap_pde, | ||
902 | data); | ||
903 | } | 894 | } |
904 | trace_kvm_age_page(hva, memslot, ret); | 895 | trace_kvm_age_page(hva, memslot, ret); |
905 | retval |= ret; | 896 | retval |= ret; |
@@ -1161,7 +1152,7 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
1161 | } | 1152 | } |
1162 | 1153 | ||
1163 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | 1154 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, |
1164 | struct kvm_mmu_page *sp, bool clear_unsync) | 1155 | struct kvm_mmu_page *sp) |
1165 | { | 1156 | { |
1166 | return 1; | 1157 | return 1; |
1167 | } | 1158 | } |
@@ -1291,7 +1282,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
1291 | if (clear_unsync) | 1282 | if (clear_unsync) |
1292 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1283 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1293 | 1284 | ||
1294 | if (vcpu->arch.mmu.sync_page(vcpu, sp, clear_unsync)) { | 1285 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { |
1295 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); | 1286 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); |
1296 | return 1; | 1287 | return 1; |
1297 | } | 1288 | } |
@@ -1332,12 +1323,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1332 | continue; | 1323 | continue; |
1333 | 1324 | ||
1334 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); | 1325 | WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); |
1326 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1335 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || | 1327 | if ((s->role.cr4_pae != !!is_pae(vcpu)) || |
1336 | (vcpu->arch.mmu.sync_page(vcpu, s, true))) { | 1328 | (vcpu->arch.mmu.sync_page(vcpu, s))) { |
1337 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); | 1329 | kvm_mmu_prepare_zap_page(vcpu->kvm, s, &invalid_list); |
1338 | continue; | 1330 | continue; |
1339 | } | 1331 | } |
1340 | kvm_unlink_unsync_page(vcpu->kvm, s); | ||
1341 | flush = true; | 1332 | flush = true; |
1342 | } | 1333 | } |
1343 | 1334 | ||
@@ -1963,9 +1954,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1963 | unsigned pte_access, int user_fault, | 1954 | unsigned pte_access, int user_fault, |
1964 | int write_fault, int dirty, int level, | 1955 | int write_fault, int dirty, int level, |
1965 | gfn_t gfn, pfn_t pfn, bool speculative, | 1956 | gfn_t gfn, pfn_t pfn, bool speculative, |
1966 | bool can_unsync, bool reset_host_protection) | 1957 | bool can_unsync, bool host_writable) |
1967 | { | 1958 | { |
1968 | u64 spte; | 1959 | u64 spte, entry = *sptep; |
1969 | int ret = 0; | 1960 | int ret = 0; |
1970 | 1961 | ||
1971 | /* | 1962 | /* |
@@ -1973,7 +1964,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1973 | * whether the guest actually used the pte (in order to detect | 1964 | * whether the guest actually used the pte (in order to detect |
1974 | * demand paging). | 1965 | * demand paging). |
1975 | */ | 1966 | */ |
1976 | spte = shadow_base_present_pte; | 1967 | spte = PT_PRESENT_MASK; |
1977 | if (!speculative) | 1968 | if (!speculative) |
1978 | spte |= shadow_accessed_mask; | 1969 | spte |= shadow_accessed_mask; |
1979 | if (!dirty) | 1970 | if (!dirty) |
@@ -1990,8 +1981,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1990 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 1981 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1991 | kvm_is_mmio_pfn(pfn)); | 1982 | kvm_is_mmio_pfn(pfn)); |
1992 | 1983 | ||
1993 | if (reset_host_protection) | 1984 | if (host_writable) |
1994 | spte |= SPTE_HOST_WRITEABLE; | 1985 | spte |= SPTE_HOST_WRITEABLE; |
1986 | else | ||
1987 | pte_access &= ~ACC_WRITE_MASK; | ||
1995 | 1988 | ||
1996 | spte |= (u64)pfn << PAGE_SHIFT; | 1989 | spte |= (u64)pfn << PAGE_SHIFT; |
1997 | 1990 | ||
@@ -2036,6 +2029,14 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2036 | 2029 | ||
2037 | set_pte: | 2030 | set_pte: |
2038 | update_spte(sptep, spte); | 2031 | update_spte(sptep, spte); |
2032 | /* | ||
2033 | * If we overwrite a writable spte with a read-only one we | ||
2034 | * should flush remote TLBs. Otherwise rmap_write_protect | ||
2035 | * will find a read-only spte, even though the writable spte | ||
2036 | * might be cached on a CPU's TLB. | ||
2037 | */ | ||
2038 | if (is_writable_pte(entry) && !is_writable_pte(*sptep)) | ||
2039 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
2039 | done: | 2040 | done: |
2040 | return ret; | 2041 | return ret; |
2041 | } | 2042 | } |
@@ -2045,7 +2046,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2045 | int user_fault, int write_fault, int dirty, | 2046 | int user_fault, int write_fault, int dirty, |
2046 | int *ptwrite, int level, gfn_t gfn, | 2047 | int *ptwrite, int level, gfn_t gfn, |
2047 | pfn_t pfn, bool speculative, | 2048 | pfn_t pfn, bool speculative, |
2048 | bool reset_host_protection) | 2049 | bool host_writable) |
2049 | { | 2050 | { |
2050 | int was_rmapped = 0; | 2051 | int was_rmapped = 0; |
2051 | int rmap_count; | 2052 | int rmap_count; |
@@ -2080,7 +2081,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2080 | 2081 | ||
2081 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, | 2082 | if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, |
2082 | dirty, level, gfn, pfn, speculative, true, | 2083 | dirty, level, gfn, pfn, speculative, true, |
2083 | reset_host_protection)) { | 2084 | host_writable)) { |
2084 | if (write_fault) | 2085 | if (write_fault) |
2085 | *ptwrite = 1; | 2086 | *ptwrite = 1; |
2086 | kvm_mmu_flush_tlb(vcpu); | 2087 | kvm_mmu_flush_tlb(vcpu); |
@@ -2211,7 +2212,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) | |||
2211 | } | 2212 | } |
2212 | 2213 | ||
2213 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 2214 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
2214 | int level, gfn_t gfn, pfn_t pfn) | 2215 | int map_writable, int level, gfn_t gfn, pfn_t pfn, |
2216 | bool prefault) | ||
2215 | { | 2217 | { |
2216 | struct kvm_shadow_walk_iterator iterator; | 2218 | struct kvm_shadow_walk_iterator iterator; |
2217 | struct kvm_mmu_page *sp; | 2219 | struct kvm_mmu_page *sp; |
@@ -2220,9 +2222,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2220 | 2222 | ||
2221 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2223 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2222 | if (iterator.level == level) { | 2224 | if (iterator.level == level) { |
2223 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 2225 | unsigned pte_access = ACC_ALL; |
2226 | |||
2227 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, | ||
2224 | 0, write, 1, &pt_write, | 2228 | 0, write, 1, &pt_write, |
2225 | level, gfn, pfn, false, true); | 2229 | level, gfn, pfn, prefault, map_writable); |
2226 | direct_pte_prefetch(vcpu, iterator.sptep); | 2230 | direct_pte_prefetch(vcpu, iterator.sptep); |
2227 | ++vcpu->stat.pf_fixed; | 2231 | ++vcpu->stat.pf_fixed; |
2228 | break; | 2232 | break; |
@@ -2277,12 +2281,17 @@ static int kvm_handle_bad_page(struct kvm *kvm, gfn_t gfn, pfn_t pfn) | |||
2277 | return 1; | 2281 | return 1; |
2278 | } | 2282 | } |
2279 | 2283 | ||
2280 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 2284 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2285 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | ||
2286 | |||
2287 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn, | ||
2288 | bool prefault) | ||
2281 | { | 2289 | { |
2282 | int r; | 2290 | int r; |
2283 | int level; | 2291 | int level; |
2284 | pfn_t pfn; | 2292 | pfn_t pfn; |
2285 | unsigned long mmu_seq; | 2293 | unsigned long mmu_seq; |
2294 | bool map_writable; | ||
2286 | 2295 | ||
2287 | level = mapping_level(vcpu, gfn); | 2296 | level = mapping_level(vcpu, gfn); |
2288 | 2297 | ||
@@ -2297,7 +2306,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
2297 | 2306 | ||
2298 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2307 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2299 | smp_rmb(); | 2308 | smp_rmb(); |
2300 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2309 | |
2310 | if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) | ||
2311 | return 0; | ||
2301 | 2312 | ||
2302 | /* mmio */ | 2313 | /* mmio */ |
2303 | if (is_error_pfn(pfn)) | 2314 | if (is_error_pfn(pfn)) |
@@ -2307,7 +2318,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
2307 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2318 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2308 | goto out_unlock; | 2319 | goto out_unlock; |
2309 | kvm_mmu_free_some_pages(vcpu); | 2320 | kvm_mmu_free_some_pages(vcpu); |
2310 | r = __direct_map(vcpu, v, write, level, gfn, pfn); | 2321 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, |
2322 | prefault); | ||
2311 | spin_unlock(&vcpu->kvm->mmu_lock); | 2323 | spin_unlock(&vcpu->kvm->mmu_lock); |
2312 | 2324 | ||
2313 | 2325 | ||
@@ -2530,6 +2542,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2530 | hpa_t root = vcpu->arch.mmu.root_hpa; | 2542 | hpa_t root = vcpu->arch.mmu.root_hpa; |
2531 | sp = page_header(root); | 2543 | sp = page_header(root); |
2532 | mmu_sync_children(vcpu, sp); | 2544 | mmu_sync_children(vcpu, sp); |
2545 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); | ||
2533 | return; | 2546 | return; |
2534 | } | 2547 | } |
2535 | for (i = 0; i < 4; ++i) { | 2548 | for (i = 0; i < 4; ++i) { |
@@ -2552,23 +2565,24 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2552 | } | 2565 | } |
2553 | 2566 | ||
2554 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, | 2567 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2555 | u32 access, u32 *error) | 2568 | u32 access, struct x86_exception *exception) |
2556 | { | 2569 | { |
2557 | if (error) | 2570 | if (exception) |
2558 | *error = 0; | 2571 | exception->error_code = 0; |
2559 | return vaddr; | 2572 | return vaddr; |
2560 | } | 2573 | } |
2561 | 2574 | ||
2562 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, | 2575 | static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, |
2563 | u32 access, u32 *error) | 2576 | u32 access, |
2577 | struct x86_exception *exception) | ||
2564 | { | 2578 | { |
2565 | if (error) | 2579 | if (exception) |
2566 | *error = 0; | 2580 | exception->error_code = 0; |
2567 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); | 2581 | return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access); |
2568 | } | 2582 | } |
2569 | 2583 | ||
2570 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | 2584 | static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, |
2571 | u32 error_code) | 2585 | u32 error_code, bool prefault) |
2572 | { | 2586 | { |
2573 | gfn_t gfn; | 2587 | gfn_t gfn; |
2574 | int r; | 2588 | int r; |
@@ -2584,17 +2598,67 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
2584 | gfn = gva >> PAGE_SHIFT; | 2598 | gfn = gva >> PAGE_SHIFT; |
2585 | 2599 | ||
2586 | return nonpaging_map(vcpu, gva & PAGE_MASK, | 2600 | return nonpaging_map(vcpu, gva & PAGE_MASK, |
2587 | error_code & PFERR_WRITE_MASK, gfn); | 2601 | error_code & PFERR_WRITE_MASK, gfn, prefault); |
2602 | } | ||
2603 | |||
2604 | static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | ||
2605 | { | ||
2606 | struct kvm_arch_async_pf arch; | ||
2607 | |||
2608 | arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; | ||
2609 | arch.gfn = gfn; | ||
2610 | arch.direct_map = vcpu->arch.mmu.direct_map; | ||
2611 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); | ||
2612 | |||
2613 | return kvm_setup_async_pf(vcpu, gva, gfn, &arch); | ||
2588 | } | 2614 | } |
2589 | 2615 | ||
2590 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | 2616 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) |
2591 | u32 error_code) | 2617 | { |
2618 | if (unlikely(!irqchip_in_kernel(vcpu->kvm) || | ||
2619 | kvm_event_needs_reinjection(vcpu))) | ||
2620 | return false; | ||
2621 | |||
2622 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
2623 | } | ||
2624 | |||
2625 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | ||
2626 | gva_t gva, pfn_t *pfn, bool write, bool *writable) | ||
2627 | { | ||
2628 | bool async; | ||
2629 | |||
2630 | *pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async, write, writable); | ||
2631 | |||
2632 | if (!async) | ||
2633 | return false; /* *pfn has correct page already */ | ||
2634 | |||
2635 | put_page(pfn_to_page(*pfn)); | ||
2636 | |||
2637 | if (!prefault && can_do_async_pf(vcpu)) { | ||
2638 | trace_kvm_try_async_get_page(gva, gfn); | ||
2639 | if (kvm_find_async_pf_gfn(vcpu, gfn)) { | ||
2640 | trace_kvm_async_pf_doublefault(gva, gfn); | ||
2641 | kvm_make_request(KVM_REQ_APF_HALT, vcpu); | ||
2642 | return true; | ||
2643 | } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) | ||
2644 | return true; | ||
2645 | } | ||
2646 | |||
2647 | *pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write, writable); | ||
2648 | |||
2649 | return false; | ||
2650 | } | ||
2651 | |||
2652 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | ||
2653 | bool prefault) | ||
2592 | { | 2654 | { |
2593 | pfn_t pfn; | 2655 | pfn_t pfn; |
2594 | int r; | 2656 | int r; |
2595 | int level; | 2657 | int level; |
2596 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2658 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2597 | unsigned long mmu_seq; | 2659 | unsigned long mmu_seq; |
2660 | int write = error_code & PFERR_WRITE_MASK; | ||
2661 | bool map_writable; | ||
2598 | 2662 | ||
2599 | ASSERT(vcpu); | 2663 | ASSERT(vcpu); |
2600 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 2664 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -2609,15 +2673,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
2609 | 2673 | ||
2610 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2674 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
2611 | smp_rmb(); | 2675 | smp_rmb(); |
2612 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2676 | |
2677 | if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) | ||
2678 | return 0; | ||
2679 | |||
2680 | /* mmio */ | ||
2613 | if (is_error_pfn(pfn)) | 2681 | if (is_error_pfn(pfn)) |
2614 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); | 2682 | return kvm_handle_bad_page(vcpu->kvm, gfn, pfn); |
2615 | spin_lock(&vcpu->kvm->mmu_lock); | 2683 | spin_lock(&vcpu->kvm->mmu_lock); |
2616 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 2684 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
2617 | goto out_unlock; | 2685 | goto out_unlock; |
2618 | kvm_mmu_free_some_pages(vcpu); | 2686 | kvm_mmu_free_some_pages(vcpu); |
2619 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 2687 | r = __direct_map(vcpu, gpa, write, map_writable, |
2620 | level, gfn, pfn); | 2688 | level, gfn, pfn, prefault); |
2621 | spin_unlock(&vcpu->kvm->mmu_lock); | 2689 | spin_unlock(&vcpu->kvm->mmu_lock); |
2622 | 2690 | ||
2623 | return r; | 2691 | return r; |
@@ -2659,18 +2727,19 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
2659 | 2727 | ||
2660 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 2728 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
2661 | { | 2729 | { |
2662 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); | 2730 | pgprintk("%s: cr3 %lx\n", __func__, kvm_read_cr3(vcpu)); |
2663 | mmu_free_roots(vcpu); | 2731 | mmu_free_roots(vcpu); |
2664 | } | 2732 | } |
2665 | 2733 | ||
2666 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) | 2734 | static unsigned long get_cr3(struct kvm_vcpu *vcpu) |
2667 | { | 2735 | { |
2668 | return vcpu->arch.cr3; | 2736 | return kvm_read_cr3(vcpu); |
2669 | } | 2737 | } |
2670 | 2738 | ||
2671 | static void inject_page_fault(struct kvm_vcpu *vcpu) | 2739 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
2740 | struct x86_exception *fault) | ||
2672 | { | 2741 | { |
2673 | vcpu->arch.mmu.inject_page_fault(vcpu); | 2742 | vcpu->arch.mmu.inject_page_fault(vcpu, fault); |
2674 | } | 2743 | } |
2675 | 2744 | ||
2676 | static void paging_free(struct kvm_vcpu *vcpu) | 2745 | static void paging_free(struct kvm_vcpu *vcpu) |
@@ -2816,6 +2885,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2816 | { | 2885 | { |
2817 | struct kvm_mmu *context = vcpu->arch.walk_mmu; | 2886 | struct kvm_mmu *context = vcpu->arch.walk_mmu; |
2818 | 2887 | ||
2888 | context->base_role.word = 0; | ||
2819 | context->new_cr3 = nonpaging_new_cr3; | 2889 | context->new_cr3 = nonpaging_new_cr3; |
2820 | context->page_fault = tdp_page_fault; | 2890 | context->page_fault = tdp_page_fault; |
2821 | context->free = nonpaging_free; | 2891 | context->free = nonpaging_free; |
@@ -3008,9 +3078,6 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3008 | return; | 3078 | return; |
3009 | } | 3079 | } |
3010 | 3080 | ||
3011 | if (is_rsvd_bits_set(&vcpu->arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) | ||
3012 | return; | ||
3013 | |||
3014 | ++vcpu->kvm->stat.mmu_pte_updated; | 3081 | ++vcpu->kvm->stat.mmu_pte_updated; |
3015 | if (!sp->role.cr4_pae) | 3082 | if (!sp->role.cr4_pae) |
3016 | paging32_update_pte(vcpu, sp, spte, new); | 3083 | paging32_update_pte(vcpu, sp, spte, new); |
@@ -3264,12 +3331,13 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | |||
3264 | } | 3331 | } |
3265 | } | 3332 | } |
3266 | 3333 | ||
3267 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | 3334 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code, |
3335 | void *insn, int insn_len) | ||
3268 | { | 3336 | { |
3269 | int r; | 3337 | int r; |
3270 | enum emulation_result er; | 3338 | enum emulation_result er; |
3271 | 3339 | ||
3272 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code); | 3340 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false); |
3273 | if (r < 0) | 3341 | if (r < 0) |
3274 | goto out; | 3342 | goto out; |
3275 | 3343 | ||
@@ -3282,7 +3350,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
3282 | if (r) | 3350 | if (r) |
3283 | goto out; | 3351 | goto out; |
3284 | 3352 | ||
3285 | er = emulate_instruction(vcpu, cr2, error_code, 0); | 3353 | er = x86_emulate_instruction(vcpu, cr2, 0, insn, insn_len); |
3286 | 3354 | ||
3287 | switch (er) { | 3355 | switch (er) { |
3288 | case EMULATE_DONE: | 3356 | case EMULATE_DONE: |
@@ -3377,11 +3445,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3377 | if (!test_bit(slot, sp->slot_bitmap)) | 3445 | if (!test_bit(slot, sp->slot_bitmap)) |
3378 | continue; | 3446 | continue; |
3379 | 3447 | ||
3448 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3449 | continue; | ||
3450 | |||
3380 | pt = sp->spt; | 3451 | pt = sp->spt; |
3381 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3452 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
3382 | /* avoid RMW */ | 3453 | /* avoid RMW */ |
3383 | if (is_writable_pte(pt[i])) | 3454 | if (is_writable_pte(pt[i])) |
3384 | pt[i] &= ~PT_WRITABLE_MASK; | 3455 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); |
3385 | } | 3456 | } |
3386 | kvm_flush_remote_tlbs(kvm); | 3457 | kvm_flush_remote_tlbs(kvm); |
3387 | } | 3458 | } |
@@ -3463,13 +3534,6 @@ static void mmu_destroy_caches(void) | |||
3463 | kmem_cache_destroy(mmu_page_header_cache); | 3534 | kmem_cache_destroy(mmu_page_header_cache); |
3464 | } | 3535 | } |
3465 | 3536 | ||
3466 | void kvm_mmu_module_exit(void) | ||
3467 | { | ||
3468 | mmu_destroy_caches(); | ||
3469 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3470 | unregister_shrinker(&mmu_shrinker); | ||
3471 | } | ||
3472 | |||
3473 | int kvm_mmu_module_init(void) | 3537 | int kvm_mmu_module_init(void) |
3474 | { | 3538 | { |
3475 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 3539 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", |
@@ -3566,7 +3630,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | |||
3566 | 3630 | ||
3567 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | 3631 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) |
3568 | { | 3632 | { |
3569 | (void)kvm_set_cr3(vcpu, vcpu->arch.cr3); | 3633 | (void)kvm_set_cr3(vcpu, kvm_read_cr3(vcpu)); |
3570 | return 1; | 3634 | return 1; |
3571 | } | 3635 | } |
3572 | 3636 | ||
@@ -3662,12 +3726,6 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
3662 | } | 3726 | } |
3663 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); | 3727 | EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); |
3664 | 3728 | ||
3665 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
3666 | #include "mmu_audit.c" | ||
3667 | #else | ||
3668 | static void mmu_audit_disable(void) { } | ||
3669 | #endif | ||
3670 | |||
3671 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | 3729 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu) |
3672 | { | 3730 | { |
3673 | ASSERT(vcpu); | 3731 | ASSERT(vcpu); |
@@ -3675,5 +3733,18 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu) | |||
3675 | destroy_kvm_mmu(vcpu); | 3733 | destroy_kvm_mmu(vcpu); |
3676 | free_mmu_pages(vcpu); | 3734 | free_mmu_pages(vcpu); |
3677 | mmu_free_memory_caches(vcpu); | 3735 | mmu_free_memory_caches(vcpu); |
3736 | } | ||
3737 | |||
3738 | #ifdef CONFIG_KVM_MMU_AUDIT | ||
3739 | #include "mmu_audit.c" | ||
3740 | #else | ||
3741 | static void mmu_audit_disable(void) { } | ||
3742 | #endif | ||
3743 | |||
3744 | void kvm_mmu_module_exit(void) | ||
3745 | { | ||
3746 | mmu_destroy_caches(); | ||
3747 | percpu_counter_destroy(&kvm_total_used_mmu_pages); | ||
3748 | unregister_shrinker(&mmu_shrinker); | ||
3678 | mmu_audit_disable(); | 3749 | mmu_audit_disable(); |
3679 | } | 3750 | } |