diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 672 |
1 files changed, 567 insertions, 105 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e55af12e11b7..2ad6f5481671 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -27,11 +27,22 @@ | |||
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/module.h> | 28 | #include <linux/module.h> |
29 | #include <linux/swap.h> | 29 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | ||
31 | #include <linux/compiler.h> | ||
30 | 32 | ||
31 | #include <asm/page.h> | 33 | #include <asm/page.h> |
32 | #include <asm/cmpxchg.h> | 34 | #include <asm/cmpxchg.h> |
33 | #include <asm/io.h> | 35 | #include <asm/io.h> |
34 | 36 | ||
37 | /* | ||
38 | * When setting this variable to true it enables Two-Dimensional-Paging | ||
39 | * where the hardware walks 2 page tables: | ||
40 | * 1. the guest-virtual to guest-physical | ||
41 | * 2. while doing 1. it walks guest-physical to host-physical | ||
42 | * If the hardware supports that we don't need to do shadow paging. | ||
43 | */ | ||
44 | bool tdp_enabled = false; | ||
45 | |||
35 | #undef MMU_DEBUG | 46 | #undef MMU_DEBUG |
36 | 47 | ||
37 | #undef AUDIT | 48 | #undef AUDIT |
@@ -101,8 +112,6 @@ static int dbg = 1; | |||
101 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 112 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
102 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 113 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
103 | 114 | ||
104 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
105 | |||
106 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | 115 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
107 | 116 | ||
108 | #define PT64_LEVEL_BITS 9 | 117 | #define PT64_LEVEL_BITS 9 |
@@ -159,6 +168,13 @@ static int dbg = 1; | |||
159 | #define ACC_USER_MASK PT_USER_MASK | 168 | #define ACC_USER_MASK PT_USER_MASK |
160 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 169 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
161 | 170 | ||
171 | struct kvm_pv_mmu_op_buffer { | ||
172 | void *ptr; | ||
173 | unsigned len; | ||
174 | unsigned processed; | ||
175 | char buf[512] __aligned(sizeof(long)); | ||
176 | }; | ||
177 | |||
162 | struct kvm_rmap_desc { | 178 | struct kvm_rmap_desc { |
163 | u64 *shadow_ptes[RMAP_EXT]; | 179 | u64 *shadow_ptes[RMAP_EXT]; |
164 | struct kvm_rmap_desc *more; | 180 | struct kvm_rmap_desc *more; |
@@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte) | |||
200 | 216 | ||
201 | static int is_shadow_present_pte(u64 pte) | 217 | static int is_shadow_present_pte(u64 pte) |
202 | { | 218 | { |
203 | pte &= ~PT_SHADOW_IO_MARK; | ||
204 | return pte != shadow_trap_nonpresent_pte | 219 | return pte != shadow_trap_nonpresent_pte |
205 | && pte != shadow_notrap_nonpresent_pte; | 220 | && pte != shadow_notrap_nonpresent_pte; |
206 | } | 221 | } |
207 | 222 | ||
223 | static int is_large_pte(u64 pte) | ||
224 | { | ||
225 | return pte & PT_PAGE_SIZE_MASK; | ||
226 | } | ||
227 | |||
208 | static int is_writeble_pte(unsigned long pte) | 228 | static int is_writeble_pte(unsigned long pte) |
209 | { | 229 | { |
210 | return pte & PT_WRITABLE_MASK; | 230 | return pte & PT_WRITABLE_MASK; |
@@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte) | |||
215 | return pte & PT_DIRTY_MASK; | 235 | return pte & PT_DIRTY_MASK; |
216 | } | 236 | } |
217 | 237 | ||
218 | static int is_io_pte(unsigned long pte) | 238 | static int is_rmap_pte(u64 pte) |
219 | { | 239 | { |
220 | return pte & PT_SHADOW_IO_MARK; | 240 | return is_shadow_present_pte(pte); |
221 | } | 241 | } |
222 | 242 | ||
223 | static int is_rmap_pte(u64 pte) | 243 | static pfn_t spte_to_pfn(u64 pte) |
224 | { | 244 | { |
225 | return is_shadow_present_pte(pte); | 245 | return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
226 | } | 246 | } |
227 | 247 | ||
228 | static gfn_t pse36_gfn_delta(u32 gpte) | 248 | static gfn_t pse36_gfn_delta(u32 gpte) |
@@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) | |||
349 | } | 369 | } |
350 | 370 | ||
351 | /* | 371 | /* |
372 | * Return the pointer to the largepage write count for a given | ||
373 | * gfn, handling slots that are not large page aligned. | ||
374 | */ | ||
375 | static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) | ||
376 | { | ||
377 | unsigned long idx; | ||
378 | |||
379 | idx = (gfn / KVM_PAGES_PER_HPAGE) - | ||
380 | (slot->base_gfn / KVM_PAGES_PER_HPAGE); | ||
381 | return &slot->lpage_info[idx].write_count; | ||
382 | } | ||
383 | |||
384 | static void account_shadowed(struct kvm *kvm, gfn_t gfn) | ||
385 | { | ||
386 | int *write_count; | ||
387 | |||
388 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | ||
389 | *write_count += 1; | ||
390 | WARN_ON(*write_count > KVM_PAGES_PER_HPAGE); | ||
391 | } | ||
392 | |||
393 | static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | ||
394 | { | ||
395 | int *write_count; | ||
396 | |||
397 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | ||
398 | *write_count -= 1; | ||
399 | WARN_ON(*write_count < 0); | ||
400 | } | ||
401 | |||
402 | static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) | ||
403 | { | ||
404 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
405 | int *largepage_idx; | ||
406 | |||
407 | if (slot) { | ||
408 | largepage_idx = slot_largepage_idx(gfn, slot); | ||
409 | return *largepage_idx; | ||
410 | } | ||
411 | |||
412 | return 1; | ||
413 | } | ||
414 | |||
415 | static int host_largepage_backed(struct kvm *kvm, gfn_t gfn) | ||
416 | { | ||
417 | struct vm_area_struct *vma; | ||
418 | unsigned long addr; | ||
419 | |||
420 | addr = gfn_to_hva(kvm, gfn); | ||
421 | if (kvm_is_error_hva(addr)) | ||
422 | return 0; | ||
423 | |||
424 | vma = find_vma(current->mm, addr); | ||
425 | if (vma && is_vm_hugetlb_page(vma)) | ||
426 | return 1; | ||
427 | |||
428 | return 0; | ||
429 | } | ||
430 | |||
431 | static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
432 | { | ||
433 | struct kvm_memory_slot *slot; | ||
434 | |||
435 | if (has_wrprotected_page(vcpu->kvm, large_gfn)) | ||
436 | return 0; | ||
437 | |||
438 | if (!host_largepage_backed(vcpu->kvm, large_gfn)) | ||
439 | return 0; | ||
440 | |||
441 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | ||
442 | if (slot && slot->dirty_bitmap) | ||
443 | return 0; | ||
444 | |||
445 | return 1; | ||
446 | } | ||
447 | |||
448 | /* | ||
352 | * Take gfn and return the reverse mapping to it. | 449 | * Take gfn and return the reverse mapping to it. |
353 | * Note: gfn must be unaliased before this function get called | 450 | * Note: gfn must be unaliased before this function get called |
354 | */ | 451 | */ |
355 | 452 | ||
356 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) | 453 | static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage) |
357 | { | 454 | { |
358 | struct kvm_memory_slot *slot; | 455 | struct kvm_memory_slot *slot; |
456 | unsigned long idx; | ||
359 | 457 | ||
360 | slot = gfn_to_memslot(kvm, gfn); | 458 | slot = gfn_to_memslot(kvm, gfn); |
361 | return &slot->rmap[gfn - slot->base_gfn]; | 459 | if (!lpage) |
460 | return &slot->rmap[gfn - slot->base_gfn]; | ||
461 | |||
462 | idx = (gfn / KVM_PAGES_PER_HPAGE) - | ||
463 | (slot->base_gfn / KVM_PAGES_PER_HPAGE); | ||
464 | |||
465 | return &slot->lpage_info[idx].rmap_pde; | ||
362 | } | 466 | } |
363 | 467 | ||
364 | /* | 468 | /* |
@@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn) | |||
370 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc | 474 | * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc |
371 | * containing more mappings. | 475 | * containing more mappings. |
372 | */ | 476 | */ |
373 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | 477 | static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) |
374 | { | 478 | { |
375 | struct kvm_mmu_page *sp; | 479 | struct kvm_mmu_page *sp; |
376 | struct kvm_rmap_desc *desc; | 480 | struct kvm_rmap_desc *desc; |
@@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
382 | gfn = unalias_gfn(vcpu->kvm, gfn); | 486 | gfn = unalias_gfn(vcpu->kvm, gfn); |
383 | sp = page_header(__pa(spte)); | 487 | sp = page_header(__pa(spte)); |
384 | sp->gfns[spte - sp->spt] = gfn; | 488 | sp->gfns[spte - sp->spt] = gfn; |
385 | rmapp = gfn_to_rmap(vcpu->kvm, gfn); | 489 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); |
386 | if (!*rmapp) { | 490 | if (!*rmapp) { |
387 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); | 491 | rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); |
388 | *rmapp = (unsigned long)spte; | 492 | *rmapp = (unsigned long)spte; |
@@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
435 | struct kvm_rmap_desc *desc; | 539 | struct kvm_rmap_desc *desc; |
436 | struct kvm_rmap_desc *prev_desc; | 540 | struct kvm_rmap_desc *prev_desc; |
437 | struct kvm_mmu_page *sp; | 541 | struct kvm_mmu_page *sp; |
438 | struct page *page; | 542 | pfn_t pfn; |
439 | unsigned long *rmapp; | 543 | unsigned long *rmapp; |
440 | int i; | 544 | int i; |
441 | 545 | ||
442 | if (!is_rmap_pte(*spte)) | 546 | if (!is_rmap_pte(*spte)) |
443 | return; | 547 | return; |
444 | sp = page_header(__pa(spte)); | 548 | sp = page_header(__pa(spte)); |
445 | page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); | 549 | pfn = spte_to_pfn(*spte); |
446 | mark_page_accessed(page); | 550 | if (*spte & PT_ACCESSED_MASK) |
551 | kvm_set_pfn_accessed(pfn); | ||
447 | if (is_writeble_pte(*spte)) | 552 | if (is_writeble_pte(*spte)) |
448 | kvm_release_page_dirty(page); | 553 | kvm_release_pfn_dirty(pfn); |
449 | else | 554 | else |
450 | kvm_release_page_clean(page); | 555 | kvm_release_pfn_clean(pfn); |
451 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]); | 556 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte)); |
452 | if (!*rmapp) { | 557 | if (!*rmapp) { |
453 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); | 558 | printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte); |
454 | BUG(); | 559 | BUG(); |
@@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
514 | int write_protected = 0; | 619 | int write_protected = 0; |
515 | 620 | ||
516 | gfn = unalias_gfn(kvm, gfn); | 621 | gfn = unalias_gfn(kvm, gfn); |
517 | rmapp = gfn_to_rmap(kvm, gfn); | 622 | rmapp = gfn_to_rmap(kvm, gfn, 0); |
518 | 623 | ||
519 | spte = rmap_next(kvm, rmapp, NULL); | 624 | spte = rmap_next(kvm, rmapp, NULL); |
520 | while (spte) { | 625 | while (spte) { |
@@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
527 | } | 632 | } |
528 | spte = rmap_next(kvm, rmapp, spte); | 633 | spte = rmap_next(kvm, rmapp, spte); |
529 | } | 634 | } |
635 | if (write_protected) { | ||
636 | pfn_t pfn; | ||
637 | |||
638 | spte = rmap_next(kvm, rmapp, NULL); | ||
639 | pfn = spte_to_pfn(*spte); | ||
640 | kvm_set_pfn_dirty(pfn); | ||
641 | } | ||
642 | |||
643 | /* check for huge page mappings */ | ||
644 | rmapp = gfn_to_rmap(kvm, gfn, 1); | ||
645 | spte = rmap_next(kvm, rmapp, NULL); | ||
646 | while (spte) { | ||
647 | BUG_ON(!spte); | ||
648 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
649 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | ||
650 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | ||
651 | if (is_writeble_pte(*spte)) { | ||
652 | rmap_remove(kvm, spte); | ||
653 | --kvm->stat.lpages; | ||
654 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | ||
655 | write_protected = 1; | ||
656 | } | ||
657 | spte = rmap_next(kvm, rmapp, spte); | ||
658 | } | ||
659 | |||
530 | if (write_protected) | 660 | if (write_protected) |
531 | kvm_flush_remote_tlbs(kvm); | 661 | kvm_flush_remote_tlbs(kvm); |
662 | |||
663 | account_shadowed(kvm, gfn); | ||
532 | } | 664 | } |
533 | 665 | ||
534 | #ifdef MMU_DEBUG | 666 | #ifdef MMU_DEBUG |
@@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt) | |||
538 | u64 *end; | 670 | u64 *end; |
539 | 671 | ||
540 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) | 672 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
541 | if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) { | 673 | if (*pos != shadow_trap_nonpresent_pte) { |
542 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | 674 | printk(KERN_ERR "%s: %p %llx\n", __func__, |
543 | pos, *pos); | 675 | pos, *pos); |
544 | return 0; | 676 | return 0; |
545 | } | 677 | } |
@@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
559 | 691 | ||
560 | static unsigned kvm_page_table_hashfn(gfn_t gfn) | 692 | static unsigned kvm_page_table_hashfn(gfn_t gfn) |
561 | { | 693 | { |
562 | return gfn; | 694 | return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); |
563 | } | 695 | } |
564 | 696 | ||
565 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | 697 | static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, |
@@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
662 | struct kvm_mmu_page *sp; | 794 | struct kvm_mmu_page *sp; |
663 | struct hlist_node *node; | 795 | struct hlist_node *node; |
664 | 796 | ||
665 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | 797 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
666 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 798 | index = kvm_page_table_hashfn(gfn); |
667 | bucket = &kvm->arch.mmu_page_hash[index]; | 799 | bucket = &kvm->arch.mmu_page_hash[index]; |
668 | hlist_for_each_entry(sp, node, bucket, hash_link) | 800 | hlist_for_each_entry(sp, node, bucket, hash_link) |
669 | if (sp->gfn == gfn && !sp->role.metaphysical) { | 801 | if (sp->gfn == gfn && !sp->role.metaphysical |
802 | && !sp->role.invalid) { | ||
670 | pgprintk("%s: found role %x\n", | 803 | pgprintk("%s: found role %x\n", |
671 | __FUNCTION__, sp->role.word); | 804 | __func__, sp->role.word); |
672 | return sp; | 805 | return sp; |
673 | } | 806 | } |
674 | return NULL; | 807 | return NULL; |
@@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
699 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; | 832 | quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; |
700 | role.quadrant = quadrant; | 833 | role.quadrant = quadrant; |
701 | } | 834 | } |
702 | pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__, | 835 | pgprintk("%s: looking gfn %lx role %x\n", __func__, |
703 | gfn, role.word); | 836 | gfn, role.word); |
704 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 837 | index = kvm_page_table_hashfn(gfn); |
705 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 838 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
706 | hlist_for_each_entry(sp, node, bucket, hash_link) | 839 | hlist_for_each_entry(sp, node, bucket, hash_link) |
707 | if (sp->gfn == gfn && sp->role.word == role.word) { | 840 | if (sp->gfn == gfn && sp->role.word == role.word) { |
708 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 841 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
709 | pgprintk("%s: found\n", __FUNCTION__); | 842 | pgprintk("%s: found\n", __func__); |
710 | return sp; | 843 | return sp; |
711 | } | 844 | } |
712 | ++vcpu->kvm->stat.mmu_cache_miss; | 845 | ++vcpu->kvm->stat.mmu_cache_miss; |
713 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); | 846 | sp = kvm_mmu_alloc_page(vcpu, parent_pte); |
714 | if (!sp) | 847 | if (!sp) |
715 | return sp; | 848 | return sp; |
716 | pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word); | 849 | pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); |
717 | sp->gfn = gfn; | 850 | sp->gfn = gfn; |
718 | sp->role = role; | 851 | sp->role = role; |
719 | hlist_add_head(&sp->hash_link, bucket); | 852 | hlist_add_head(&sp->hash_link, bucket); |
720 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | ||
721 | if (!metaphysical) | 853 | if (!metaphysical) |
722 | rmap_write_protect(vcpu->kvm, gfn); | 854 | rmap_write_protect(vcpu->kvm, gfn); |
855 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | ||
723 | return sp; | 856 | return sp; |
724 | } | 857 | } |
725 | 858 | ||
@@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
745 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 878 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
746 | ent = pt[i]; | 879 | ent = pt[i]; |
747 | 880 | ||
881 | if (is_shadow_present_pte(ent)) { | ||
882 | if (!is_large_pte(ent)) { | ||
883 | ent &= PT64_BASE_ADDR_MASK; | ||
884 | mmu_page_remove_parent_pte(page_header(ent), | ||
885 | &pt[i]); | ||
886 | } else { | ||
887 | --kvm->stat.lpages; | ||
888 | rmap_remove(kvm, &pt[i]); | ||
889 | } | ||
890 | } | ||
748 | pt[i] = shadow_trap_nonpresent_pte; | 891 | pt[i] = shadow_trap_nonpresent_pte; |
749 | if (!is_shadow_present_pte(ent)) | ||
750 | continue; | ||
751 | ent &= PT64_BASE_ADDR_MASK; | ||
752 | mmu_page_remove_parent_pte(page_header(ent), &pt[i]); | ||
753 | } | 892 | } |
754 | kvm_flush_remote_tlbs(kvm); | 893 | kvm_flush_remote_tlbs(kvm); |
755 | } | 894 | } |
@@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
789 | } | 928 | } |
790 | kvm_mmu_page_unlink_children(kvm, sp); | 929 | kvm_mmu_page_unlink_children(kvm, sp); |
791 | if (!sp->root_count) { | 930 | if (!sp->root_count) { |
931 | if (!sp->role.metaphysical) | ||
932 | unaccount_shadowed(kvm, sp->gfn); | ||
792 | hlist_del(&sp->hash_link); | 933 | hlist_del(&sp->hash_link); |
793 | kvm_mmu_free_page(kvm, sp); | 934 | kvm_mmu_free_page(kvm, sp); |
794 | } else | 935 | } else { |
795 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 936 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
937 | sp->role.invalid = 1; | ||
938 | kvm_reload_remote_mmus(kvm); | ||
939 | } | ||
796 | kvm_mmu_reset_last_pte_updated(kvm); | 940 | kvm_mmu_reset_last_pte_updated(kvm); |
797 | } | 941 | } |
798 | 942 | ||
@@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
838 | struct hlist_node *node, *n; | 982 | struct hlist_node *node, *n; |
839 | int r; | 983 | int r; |
840 | 984 | ||
841 | pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn); | 985 | pgprintk("%s: looking for gfn %lx\n", __func__, gfn); |
842 | r = 0; | 986 | r = 0; |
843 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 987 | index = kvm_page_table_hashfn(gfn); |
844 | bucket = &kvm->arch.mmu_page_hash[index]; | 988 | bucket = &kvm->arch.mmu_page_hash[index]; |
845 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) | 989 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) |
846 | if (sp->gfn == gfn && !sp->role.metaphysical) { | 990 | if (sp->gfn == gfn && !sp->role.metaphysical) { |
847 | pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn, | 991 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
848 | sp->role.word); | 992 | sp->role.word); |
849 | kvm_mmu_zap_page(kvm, sp); | 993 | kvm_mmu_zap_page(kvm, sp); |
850 | r = 1; | 994 | r = 1; |
@@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
857 | struct kvm_mmu_page *sp; | 1001 | struct kvm_mmu_page *sp; |
858 | 1002 | ||
859 | while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { | 1003 | while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) { |
860 | pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word); | 1004 | pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); |
861 | kvm_mmu_zap_page(kvm, sp); | 1005 | kvm_mmu_zap_page(kvm, sp); |
862 | } | 1006 | } |
863 | } | 1007 | } |
@@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
889 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1033 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
890 | unsigned pt_access, unsigned pte_access, | 1034 | unsigned pt_access, unsigned pte_access, |
891 | int user_fault, int write_fault, int dirty, | 1035 | int user_fault, int write_fault, int dirty, |
892 | int *ptwrite, gfn_t gfn, struct page *page) | 1036 | int *ptwrite, int largepage, gfn_t gfn, |
1037 | pfn_t pfn, bool speculative) | ||
893 | { | 1038 | { |
894 | u64 spte; | 1039 | u64 spte; |
895 | int was_rmapped = 0; | 1040 | int was_rmapped = 0; |
896 | int was_writeble = is_writeble_pte(*shadow_pte); | 1041 | int was_writeble = is_writeble_pte(*shadow_pte); |
897 | hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
898 | 1042 | ||
899 | pgprintk("%s: spte %llx access %x write_fault %d" | 1043 | pgprintk("%s: spte %llx access %x write_fault %d" |
900 | " user_fault %d gfn %lx\n", | 1044 | " user_fault %d gfn %lx\n", |
901 | __FUNCTION__, *shadow_pte, pt_access, | 1045 | __func__, *shadow_pte, pt_access, |
902 | write_fault, user_fault, gfn); | 1046 | write_fault, user_fault, gfn); |
903 | 1047 | ||
904 | if (is_rmap_pte(*shadow_pte)) { | 1048 | if (is_rmap_pte(*shadow_pte)) { |
905 | if (host_pfn != page_to_pfn(page)) { | 1049 | /* |
1050 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | ||
1051 | * the parent of the now unreachable PTE. | ||
1052 | */ | ||
1053 | if (largepage && !is_large_pte(*shadow_pte)) { | ||
1054 | struct kvm_mmu_page *child; | ||
1055 | u64 pte = *shadow_pte; | ||
1056 | |||
1057 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
1058 | mmu_page_remove_parent_pte(child, shadow_pte); | ||
1059 | } else if (pfn != spte_to_pfn(*shadow_pte)) { | ||
906 | pgprintk("hfn old %lx new %lx\n", | 1060 | pgprintk("hfn old %lx new %lx\n", |
907 | host_pfn, page_to_pfn(page)); | 1061 | spte_to_pfn(*shadow_pte), pfn); |
908 | rmap_remove(vcpu->kvm, shadow_pte); | 1062 | rmap_remove(vcpu->kvm, shadow_pte); |
1063 | } else { | ||
1064 | if (largepage) | ||
1065 | was_rmapped = is_large_pte(*shadow_pte); | ||
1066 | else | ||
1067 | was_rmapped = 1; | ||
909 | } | 1068 | } |
910 | else | ||
911 | was_rmapped = 1; | ||
912 | } | 1069 | } |
913 | 1070 | ||
914 | /* | 1071 | /* |
@@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
917 | * demand paging). | 1074 | * demand paging). |
918 | */ | 1075 | */ |
919 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; | 1076 | spte = PT_PRESENT_MASK | PT_DIRTY_MASK; |
1077 | if (!speculative) | ||
1078 | pte_access |= PT_ACCESSED_MASK; | ||
920 | if (!dirty) | 1079 | if (!dirty) |
921 | pte_access &= ~ACC_WRITE_MASK; | 1080 | pte_access &= ~ACC_WRITE_MASK; |
922 | if (!(pte_access & ACC_EXEC_MASK)) | 1081 | if (!(pte_access & ACC_EXEC_MASK)) |
@@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
925 | spte |= PT_PRESENT_MASK; | 1084 | spte |= PT_PRESENT_MASK; |
926 | if (pte_access & ACC_USER_MASK) | 1085 | if (pte_access & ACC_USER_MASK) |
927 | spte |= PT_USER_MASK; | 1086 | spte |= PT_USER_MASK; |
1087 | if (largepage) | ||
1088 | spte |= PT_PAGE_SIZE_MASK; | ||
928 | 1089 | ||
929 | if (is_error_page(page)) { | 1090 | spte |= (u64)pfn << PAGE_SHIFT; |
930 | set_shadow_pte(shadow_pte, | ||
931 | shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK); | ||
932 | kvm_release_page_clean(page); | ||
933 | return; | ||
934 | } | ||
935 | |||
936 | spte |= page_to_phys(page); | ||
937 | 1091 | ||
938 | if ((pte_access & ACC_WRITE_MASK) | 1092 | if ((pte_access & ACC_WRITE_MASK) |
939 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1093 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
@@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
946 | } | 1100 | } |
947 | 1101 | ||
948 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1102 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); |
949 | if (shadow) { | 1103 | if (shadow || |
1104 | (largepage && has_wrprotected_page(vcpu->kvm, gfn))) { | ||
950 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1105 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
951 | __FUNCTION__, gfn); | 1106 | __func__, gfn); |
952 | pte_access &= ~ACC_WRITE_MASK; | 1107 | pte_access &= ~ACC_WRITE_MASK; |
953 | if (is_writeble_pte(spte)) { | 1108 | if (is_writeble_pte(spte)) { |
954 | spte &= ~PT_WRITABLE_MASK; | 1109 | spte &= ~PT_WRITABLE_MASK; |
@@ -964,18 +1119,25 @@ unshadowed: | |||
964 | if (pte_access & ACC_WRITE_MASK) | 1119 | if (pte_access & ACC_WRITE_MASK) |
965 | mark_page_dirty(vcpu->kvm, gfn); | 1120 | mark_page_dirty(vcpu->kvm, gfn); |
966 | 1121 | ||
967 | pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte); | 1122 | pgprintk("%s: setting spte %llx\n", __func__, spte); |
1123 | pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", | ||
1124 | (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", | ||
1125 | (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); | ||
968 | set_shadow_pte(shadow_pte, spte); | 1126 | set_shadow_pte(shadow_pte, spte); |
1127 | if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK) | ||
1128 | && (spte & PT_PRESENT_MASK)) | ||
1129 | ++vcpu->kvm->stat.lpages; | ||
1130 | |||
969 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); | 1131 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); |
970 | if (!was_rmapped) { | 1132 | if (!was_rmapped) { |
971 | rmap_add(vcpu, shadow_pte, gfn); | 1133 | rmap_add(vcpu, shadow_pte, gfn, largepage); |
972 | if (!is_rmap_pte(*shadow_pte)) | 1134 | if (!is_rmap_pte(*shadow_pte)) |
973 | kvm_release_page_clean(page); | 1135 | kvm_release_pfn_clean(pfn); |
974 | } else { | 1136 | } else { |
975 | if (was_writeble) | 1137 | if (was_writeble) |
976 | kvm_release_page_dirty(page); | 1138 | kvm_release_pfn_dirty(pfn); |
977 | else | 1139 | else |
978 | kvm_release_page_clean(page); | 1140 | kvm_release_pfn_clean(pfn); |
979 | } | 1141 | } |
980 | if (!ptwrite || !*ptwrite) | 1142 | if (!ptwrite || !*ptwrite) |
981 | vcpu->arch.last_pte_updated = shadow_pte; | 1143 | vcpu->arch.last_pte_updated = shadow_pte; |
@@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
985 | { | 1147 | { |
986 | } | 1148 | } |
987 | 1149 | ||
988 | static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | 1150 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, |
989 | gfn_t gfn, struct page *page) | 1151 | int largepage, gfn_t gfn, pfn_t pfn, |
1152 | int level) | ||
990 | { | 1153 | { |
991 | int level = PT32E_ROOT_LEVEL; | ||
992 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 1154 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; |
993 | int pt_write = 0; | 1155 | int pt_write = 0; |
994 | 1156 | ||
@@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1001 | 1163 | ||
1002 | if (level == 1) { | 1164 | if (level == 1) { |
1003 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1165 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, |
1004 | 0, write, 1, &pt_write, gfn, page); | 1166 | 0, write, 1, &pt_write, 0, gfn, pfn, false); |
1005 | return pt_write || is_io_pte(table[index]); | 1167 | return pt_write; |
1168 | } | ||
1169 | |||
1170 | if (largepage && level == 2) { | ||
1171 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | ||
1172 | 0, write, 1, &pt_write, 1, gfn, pfn, false); | ||
1173 | return pt_write; | ||
1006 | } | 1174 | } |
1007 | 1175 | ||
1008 | if (table[index] == shadow_trap_nonpresent_pte) { | 1176 | if (table[index] == shadow_trap_nonpresent_pte) { |
@@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1016 | 1, ACC_ALL, &table[index]); | 1184 | 1, ACC_ALL, &table[index]); |
1017 | if (!new_table) { | 1185 | if (!new_table) { |
1018 | pgprintk("nonpaging_map: ENOMEM\n"); | 1186 | pgprintk("nonpaging_map: ENOMEM\n"); |
1019 | kvm_release_page_clean(page); | 1187 | kvm_release_pfn_clean(pfn); |
1020 | return -ENOMEM; | 1188 | return -ENOMEM; |
1021 | } | 1189 | } |
1022 | 1190 | ||
@@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, | |||
1030 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 1198 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
1031 | { | 1199 | { |
1032 | int r; | 1200 | int r; |
1033 | 1201 | int largepage = 0; | |
1034 | struct page *page; | 1202 | pfn_t pfn; |
1035 | |||
1036 | down_read(&vcpu->kvm->slots_lock); | ||
1037 | 1203 | ||
1038 | down_read(¤t->mm->mmap_sem); | 1204 | down_read(¤t->mm->mmap_sem); |
1039 | page = gfn_to_page(vcpu->kvm, gfn); | 1205 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
1206 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | ||
1207 | largepage = 1; | ||
1208 | } | ||
1209 | |||
1210 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
1040 | up_read(¤t->mm->mmap_sem); | 1211 | up_read(¤t->mm->mmap_sem); |
1041 | 1212 | ||
1213 | /* mmio */ | ||
1214 | if (is_error_pfn(pfn)) { | ||
1215 | kvm_release_pfn_clean(pfn); | ||
1216 | return 1; | ||
1217 | } | ||
1218 | |||
1042 | spin_lock(&vcpu->kvm->mmu_lock); | 1219 | spin_lock(&vcpu->kvm->mmu_lock); |
1043 | kvm_mmu_free_some_pages(vcpu); | 1220 | kvm_mmu_free_some_pages(vcpu); |
1044 | r = __nonpaging_map(vcpu, v, write, gfn, page); | 1221 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
1222 | PT32E_ROOT_LEVEL); | ||
1045 | spin_unlock(&vcpu->kvm->mmu_lock); | 1223 | spin_unlock(&vcpu->kvm->mmu_lock); |
1046 | 1224 | ||
1047 | up_read(&vcpu->kvm->slots_lock); | ||
1048 | 1225 | ||
1049 | return r; | 1226 | return r; |
1050 | } | 1227 | } |
@@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1073 | 1250 | ||
1074 | sp = page_header(root); | 1251 | sp = page_header(root); |
1075 | --sp->root_count; | 1252 | --sp->root_count; |
1253 | if (!sp->root_count && sp->role.invalid) | ||
1254 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
1076 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1255 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1077 | spin_unlock(&vcpu->kvm->mmu_lock); | 1256 | spin_unlock(&vcpu->kvm->mmu_lock); |
1078 | return; | 1257 | return; |
@@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1085 | root &= PT64_BASE_ADDR_MASK; | 1264 | root &= PT64_BASE_ADDR_MASK; |
1086 | sp = page_header(root); | 1265 | sp = page_header(root); |
1087 | --sp->root_count; | 1266 | --sp->root_count; |
1267 | if (!sp->root_count && sp->role.invalid) | ||
1268 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
1088 | } | 1269 | } |
1089 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 1270 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
1090 | } | 1271 | } |
@@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1097 | int i; | 1278 | int i; |
1098 | gfn_t root_gfn; | 1279 | gfn_t root_gfn; |
1099 | struct kvm_mmu_page *sp; | 1280 | struct kvm_mmu_page *sp; |
1281 | int metaphysical = 0; | ||
1100 | 1282 | ||
1101 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; | 1283 | root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT; |
1102 | 1284 | ||
@@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1105 | hpa_t root = vcpu->arch.mmu.root_hpa; | 1287 | hpa_t root = vcpu->arch.mmu.root_hpa; |
1106 | 1288 | ||
1107 | ASSERT(!VALID_PAGE(root)); | 1289 | ASSERT(!VALID_PAGE(root)); |
1290 | if (tdp_enabled) | ||
1291 | metaphysical = 1; | ||
1108 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1292 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
1109 | PT64_ROOT_LEVEL, 0, ACC_ALL, NULL); | 1293 | PT64_ROOT_LEVEL, metaphysical, |
1294 | ACC_ALL, NULL); | ||
1110 | root = __pa(sp->spt); | 1295 | root = __pa(sp->spt); |
1111 | ++sp->root_count; | 1296 | ++sp->root_count; |
1112 | vcpu->arch.mmu.root_hpa = root; | 1297 | vcpu->arch.mmu.root_hpa = root; |
1113 | return; | 1298 | return; |
1114 | } | 1299 | } |
1115 | #endif | 1300 | #endif |
1301 | metaphysical = !is_paging(vcpu); | ||
1302 | if (tdp_enabled) | ||
1303 | metaphysical = 1; | ||
1116 | for (i = 0; i < 4; ++i) { | 1304 | for (i = 0; i < 4; ++i) { |
1117 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1305 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
1118 | 1306 | ||
@@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1126 | } else if (vcpu->arch.mmu.root_level == 0) | 1314 | } else if (vcpu->arch.mmu.root_level == 0) |
1127 | root_gfn = 0; | 1315 | root_gfn = 0; |
1128 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1316 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
1129 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 1317 | PT32_ROOT_LEVEL, metaphysical, |
1130 | ACC_ALL, NULL); | 1318 | ACC_ALL, NULL); |
1131 | root = __pa(sp->spt); | 1319 | root = __pa(sp->spt); |
1132 | ++sp->root_count; | 1320 | ++sp->root_count; |
@@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
1146 | gfn_t gfn; | 1334 | gfn_t gfn; |
1147 | int r; | 1335 | int r; |
1148 | 1336 | ||
1149 | pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code); | 1337 | pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); |
1150 | r = mmu_topup_memory_caches(vcpu); | 1338 | r = mmu_topup_memory_caches(vcpu); |
1151 | if (r) | 1339 | if (r) |
1152 | return r; | 1340 | return r; |
@@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, | |||
1160 | error_code & PFERR_WRITE_MASK, gfn); | 1348 | error_code & PFERR_WRITE_MASK, gfn); |
1161 | } | 1349 | } |
1162 | 1350 | ||
1351 | static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | ||
1352 | u32 error_code) | ||
1353 | { | ||
1354 | pfn_t pfn; | ||
1355 | int r; | ||
1356 | int largepage = 0; | ||
1357 | gfn_t gfn = gpa >> PAGE_SHIFT; | ||
1358 | |||
1359 | ASSERT(vcpu); | ||
1360 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | ||
1361 | |||
1362 | r = mmu_topup_memory_caches(vcpu); | ||
1363 | if (r) | ||
1364 | return r; | ||
1365 | |||
1366 | down_read(¤t->mm->mmap_sem); | ||
1367 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | ||
1368 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | ||
1369 | largepage = 1; | ||
1370 | } | ||
1371 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
1372 | up_read(¤t->mm->mmap_sem); | ||
1373 | if (is_error_pfn(pfn)) { | ||
1374 | kvm_release_pfn_clean(pfn); | ||
1375 | return 1; | ||
1376 | } | ||
1377 | spin_lock(&vcpu->kvm->mmu_lock); | ||
1378 | kvm_mmu_free_some_pages(vcpu); | ||
1379 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | ||
1380 | largepage, gfn, pfn, TDP_ROOT_LEVEL); | ||
1381 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1382 | |||
1383 | return r; | ||
1384 | } | ||
1385 | |||
1163 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 1386 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
1164 | { | 1387 | { |
1165 | mmu_free_roots(vcpu); | 1388 | mmu_free_roots(vcpu); |
@@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) | |||
1188 | 1411 | ||
1189 | static void paging_new_cr3(struct kvm_vcpu *vcpu) | 1412 | static void paging_new_cr3(struct kvm_vcpu *vcpu) |
1190 | { | 1413 | { |
1191 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3); | 1414 | pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3); |
1192 | mmu_free_roots(vcpu); | 1415 | mmu_free_roots(vcpu); |
1193 | } | 1416 | } |
1194 | 1417 | ||
@@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu) | |||
1253 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); | 1476 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
1254 | } | 1477 | } |
1255 | 1478 | ||
1256 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 1479 | static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) |
1480 | { | ||
1481 | struct kvm_mmu *context = &vcpu->arch.mmu; | ||
1482 | |||
1483 | context->new_cr3 = nonpaging_new_cr3; | ||
1484 | context->page_fault = tdp_page_fault; | ||
1485 | context->free = nonpaging_free; | ||
1486 | context->prefetch_page = nonpaging_prefetch_page; | ||
1487 | context->shadow_root_level = TDP_ROOT_LEVEL; | ||
1488 | context->root_hpa = INVALID_PAGE; | ||
1489 | |||
1490 | if (!is_paging(vcpu)) { | ||
1491 | context->gva_to_gpa = nonpaging_gva_to_gpa; | ||
1492 | context->root_level = 0; | ||
1493 | } else if (is_long_mode(vcpu)) { | ||
1494 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
1495 | context->root_level = PT64_ROOT_LEVEL; | ||
1496 | } else if (is_pae(vcpu)) { | ||
1497 | context->gva_to_gpa = paging64_gva_to_gpa; | ||
1498 | context->root_level = PT32E_ROOT_LEVEL; | ||
1499 | } else { | ||
1500 | context->gva_to_gpa = paging32_gva_to_gpa; | ||
1501 | context->root_level = PT32_ROOT_LEVEL; | ||
1502 | } | ||
1503 | |||
1504 | return 0; | ||
1505 | } | ||
1506 | |||
1507 | static int init_kvm_softmmu(struct kvm_vcpu *vcpu) | ||
1257 | { | 1508 | { |
1258 | ASSERT(vcpu); | 1509 | ASSERT(vcpu); |
1259 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 1510 | ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu) | |||
1268 | return paging32_init_context(vcpu); | 1519 | return paging32_init_context(vcpu); |
1269 | } | 1520 | } |
1270 | 1521 | ||
1522 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | ||
1523 | { | ||
1524 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
1525 | |||
1526 | if (tdp_enabled) | ||
1527 | return init_kvm_tdp_mmu(vcpu); | ||
1528 | else | ||
1529 | return init_kvm_softmmu(vcpu); | ||
1530 | } | ||
1531 | |||
1271 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | 1532 | static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) |
1272 | { | 1533 | { |
1273 | ASSERT(vcpu); | 1534 | ASSERT(vcpu); |
@@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1316 | 1577 | ||
1317 | pte = *spte; | 1578 | pte = *spte; |
1318 | if (is_shadow_present_pte(pte)) { | 1579 | if (is_shadow_present_pte(pte)) { |
1319 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | 1580 | if (sp->role.level == PT_PAGE_TABLE_LEVEL || |
1581 | is_large_pte(pte)) | ||
1320 | rmap_remove(vcpu->kvm, spte); | 1582 | rmap_remove(vcpu->kvm, spte); |
1321 | else { | 1583 | else { |
1322 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1584 | child = page_header(pte & PT64_BASE_ADDR_MASK); |
@@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1324 | } | 1586 | } |
1325 | } | 1587 | } |
1326 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | 1588 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); |
1589 | if (is_large_pte(pte)) | ||
1590 | --vcpu->kvm->stat.lpages; | ||
1327 | } | 1591 | } |
1328 | 1592 | ||
1329 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 1593 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
1330 | struct kvm_mmu_page *sp, | 1594 | struct kvm_mmu_page *sp, |
1331 | u64 *spte, | 1595 | u64 *spte, |
1332 | const void *new, int bytes, | 1596 | const void *new) |
1333 | int offset_in_pte) | ||
1334 | { | 1597 | { |
1335 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 1598 | if ((sp->role.level != PT_PAGE_TABLE_LEVEL) |
1599 | && !vcpu->arch.update_pte.largepage) { | ||
1336 | ++vcpu->kvm->stat.mmu_pde_zapped; | 1600 | ++vcpu->kvm->stat.mmu_pde_zapped; |
1337 | return; | 1601 | return; |
1338 | } | 1602 | } |
1339 | 1603 | ||
1340 | ++vcpu->kvm->stat.mmu_pte_updated; | 1604 | ++vcpu->kvm->stat.mmu_pte_updated; |
1341 | if (sp->role.glevels == PT32_ROOT_LEVEL) | 1605 | if (sp->role.glevels == PT32_ROOT_LEVEL) |
1342 | paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); | 1606 | paging32_update_pte(vcpu, sp, spte, new); |
1343 | else | 1607 | else |
1344 | paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte); | 1608 | paging64_update_pte(vcpu, sp, spte, new); |
1345 | } | 1609 | } |
1346 | 1610 | ||
1347 | static bool need_remote_flush(u64 old, u64 new) | 1611 | static bool need_remote_flush(u64 old, u64 new) |
@@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1378 | gfn_t gfn; | 1642 | gfn_t gfn; |
1379 | int r; | 1643 | int r; |
1380 | u64 gpte = 0; | 1644 | u64 gpte = 0; |
1381 | struct page *page; | 1645 | pfn_t pfn; |
1646 | |||
1647 | vcpu->arch.update_pte.largepage = 0; | ||
1382 | 1648 | ||
1383 | if (bytes != 4 && bytes != 8) | 1649 | if (bytes != 4 && bytes != 8) |
1384 | return; | 1650 | return; |
@@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1408 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 1674 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
1409 | 1675 | ||
1410 | down_read(¤t->mm->mmap_sem); | 1676 | down_read(¤t->mm->mmap_sem); |
1411 | page = gfn_to_page(vcpu->kvm, gfn); | 1677 | if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { |
1678 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | ||
1679 | vcpu->arch.update_pte.largepage = 1; | ||
1680 | } | ||
1681 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
1412 | up_read(¤t->mm->mmap_sem); | 1682 | up_read(¤t->mm->mmap_sem); |
1413 | 1683 | ||
1684 | if (is_error_pfn(pfn)) { | ||
1685 | kvm_release_pfn_clean(pfn); | ||
1686 | return; | ||
1687 | } | ||
1414 | vcpu->arch.update_pte.gfn = gfn; | 1688 | vcpu->arch.update_pte.gfn = gfn; |
1415 | vcpu->arch.update_pte.page = page; | 1689 | vcpu->arch.update_pte.pfn = pfn; |
1416 | } | 1690 | } |
1417 | 1691 | ||
1418 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1692 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
@@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1423 | struct hlist_node *node, *n; | 1697 | struct hlist_node *node, *n; |
1424 | struct hlist_head *bucket; | 1698 | struct hlist_head *bucket; |
1425 | unsigned index; | 1699 | unsigned index; |
1426 | u64 entry; | 1700 | u64 entry, gentry; |
1427 | u64 *spte; | 1701 | u64 *spte; |
1428 | unsigned offset = offset_in_page(gpa); | 1702 | unsigned offset = offset_in_page(gpa); |
1429 | unsigned pte_size; | 1703 | unsigned pte_size; |
@@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1433 | int level; | 1707 | int level; |
1434 | int flooded = 0; | 1708 | int flooded = 0; |
1435 | int npte; | 1709 | int npte; |
1710 | int r; | ||
1436 | 1711 | ||
1437 | pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); | 1712 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
1438 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 1713 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); |
1439 | spin_lock(&vcpu->kvm->mmu_lock); | 1714 | spin_lock(&vcpu->kvm->mmu_lock); |
1440 | kvm_mmu_free_some_pages(vcpu); | 1715 | kvm_mmu_free_some_pages(vcpu); |
@@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1450 | vcpu->arch.last_pt_write_count = 1; | 1725 | vcpu->arch.last_pt_write_count = 1; |
1451 | vcpu->arch.last_pte_updated = NULL; | 1726 | vcpu->arch.last_pte_updated = NULL; |
1452 | } | 1727 | } |
1453 | index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES; | 1728 | index = kvm_page_table_hashfn(gfn); |
1454 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1729 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
1455 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 1730 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
1456 | if (sp->gfn != gfn || sp->role.metaphysical) | 1731 | if (sp->gfn != gfn || sp->role.metaphysical) |
@@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1496 | continue; | 1771 | continue; |
1497 | } | 1772 | } |
1498 | spte = &sp->spt[page_offset / sizeof(*spte)]; | 1773 | spte = &sp->spt[page_offset / sizeof(*spte)]; |
1774 | if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { | ||
1775 | gentry = 0; | ||
1776 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
1777 | gpa & ~(u64)(pte_size - 1), | ||
1778 | &gentry, pte_size); | ||
1779 | new = (const void *)&gentry; | ||
1780 | if (r < 0) | ||
1781 | new = NULL; | ||
1782 | } | ||
1499 | while (npte--) { | 1783 | while (npte--) { |
1500 | entry = *spte; | 1784 | entry = *spte; |
1501 | mmu_pte_write_zap_pte(vcpu, sp, spte); | 1785 | mmu_pte_write_zap_pte(vcpu, sp, spte); |
1502 | mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes, | 1786 | if (new) |
1503 | page_offset & (pte_size - 1)); | 1787 | mmu_pte_write_new_pte(vcpu, sp, spte, new); |
1504 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); | 1788 | mmu_pte_write_flush_tlb(vcpu, entry, *spte); |
1505 | ++spte; | 1789 | ++spte; |
1506 | } | 1790 | } |
1507 | } | 1791 | } |
1508 | kvm_mmu_audit(vcpu, "post pte write"); | 1792 | kvm_mmu_audit(vcpu, "post pte write"); |
1509 | spin_unlock(&vcpu->kvm->mmu_lock); | 1793 | spin_unlock(&vcpu->kvm->mmu_lock); |
1510 | if (vcpu->arch.update_pte.page) { | 1794 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { |
1511 | kvm_release_page_clean(vcpu->arch.update_pte.page); | 1795 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); |
1512 | vcpu->arch.update_pte.page = NULL; | 1796 | vcpu->arch.update_pte.pfn = bad_pfn; |
1513 | } | 1797 | } |
1514 | } | 1798 | } |
1515 | 1799 | ||
@@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1518 | gpa_t gpa; | 1802 | gpa_t gpa; |
1519 | int r; | 1803 | int r; |
1520 | 1804 | ||
1521 | down_read(&vcpu->kvm->slots_lock); | ||
1522 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1805 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); |
1523 | up_read(&vcpu->kvm->slots_lock); | ||
1524 | 1806 | ||
1525 | spin_lock(&vcpu->kvm->mmu_lock); | 1807 | spin_lock(&vcpu->kvm->mmu_lock); |
1526 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1808 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
@@ -1577,6 +1859,12 @@ out: | |||
1577 | } | 1859 | } |
1578 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 1860 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
1579 | 1861 | ||
1862 | void kvm_enable_tdp(void) | ||
1863 | { | ||
1864 | tdp_enabled = true; | ||
1865 | } | ||
1866 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | ||
1867 | |||
1580 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1868 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
1581 | { | 1869 | { |
1582 | struct kvm_mmu_page *sp; | 1870 | struct kvm_mmu_page *sp; |
@@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
1677 | kvm_flush_remote_tlbs(kvm); | 1965 | kvm_flush_remote_tlbs(kvm); |
1678 | } | 1966 | } |
1679 | 1967 | ||
1680 | void kvm_mmu_module_exit(void) | 1968 | void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) |
1969 | { | ||
1970 | struct kvm_mmu_page *page; | ||
1971 | |||
1972 | page = container_of(kvm->arch.active_mmu_pages.prev, | ||
1973 | struct kvm_mmu_page, link); | ||
1974 | kvm_mmu_zap_page(kvm, page); | ||
1975 | } | ||
1976 | |||
1977 | static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | ||
1978 | { | ||
1979 | struct kvm *kvm; | ||
1980 | struct kvm *kvm_freed = NULL; | ||
1981 | int cache_count = 0; | ||
1982 | |||
1983 | spin_lock(&kvm_lock); | ||
1984 | |||
1985 | list_for_each_entry(kvm, &vm_list, vm_list) { | ||
1986 | int npages; | ||
1987 | |||
1988 | spin_lock(&kvm->mmu_lock); | ||
1989 | npages = kvm->arch.n_alloc_mmu_pages - | ||
1990 | kvm->arch.n_free_mmu_pages; | ||
1991 | cache_count += npages; | ||
1992 | if (!kvm_freed && nr_to_scan > 0 && npages > 0) { | ||
1993 | kvm_mmu_remove_one_alloc_mmu_page(kvm); | ||
1994 | cache_count--; | ||
1995 | kvm_freed = kvm; | ||
1996 | } | ||
1997 | nr_to_scan--; | ||
1998 | |||
1999 | spin_unlock(&kvm->mmu_lock); | ||
2000 | } | ||
2001 | if (kvm_freed) | ||
2002 | list_move_tail(&kvm_freed->vm_list, &vm_list); | ||
2003 | |||
2004 | spin_unlock(&kvm_lock); | ||
2005 | |||
2006 | return cache_count; | ||
2007 | } | ||
2008 | |||
2009 | static struct shrinker mmu_shrinker = { | ||
2010 | .shrink = mmu_shrink, | ||
2011 | .seeks = DEFAULT_SEEKS * 10, | ||
2012 | }; | ||
2013 | |||
2014 | void mmu_destroy_caches(void) | ||
1681 | { | 2015 | { |
1682 | if (pte_chain_cache) | 2016 | if (pte_chain_cache) |
1683 | kmem_cache_destroy(pte_chain_cache); | 2017 | kmem_cache_destroy(pte_chain_cache); |
@@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void) | |||
1687 | kmem_cache_destroy(mmu_page_header_cache); | 2021 | kmem_cache_destroy(mmu_page_header_cache); |
1688 | } | 2022 | } |
1689 | 2023 | ||
2024 | void kvm_mmu_module_exit(void) | ||
2025 | { | ||
2026 | mmu_destroy_caches(); | ||
2027 | unregister_shrinker(&mmu_shrinker); | ||
2028 | } | ||
2029 | |||
1690 | int kvm_mmu_module_init(void) | 2030 | int kvm_mmu_module_init(void) |
1691 | { | 2031 | { |
1692 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", | 2032 | pte_chain_cache = kmem_cache_create("kvm_pte_chain", |
@@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void) | |||
1706 | if (!mmu_page_header_cache) | 2046 | if (!mmu_page_header_cache) |
1707 | goto nomem; | 2047 | goto nomem; |
1708 | 2048 | ||
2049 | register_shrinker(&mmu_shrinker); | ||
2050 | |||
1709 | return 0; | 2051 | return 0; |
1710 | 2052 | ||
1711 | nomem: | 2053 | nomem: |
1712 | kvm_mmu_module_exit(); | 2054 | mmu_destroy_caches(); |
1713 | return -ENOMEM; | 2055 | return -ENOMEM; |
1714 | } | 2056 | } |
1715 | 2057 | ||
@@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
1732 | return nr_mmu_pages; | 2074 | return nr_mmu_pages; |
1733 | } | 2075 | } |
1734 | 2076 | ||
2077 | static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
2078 | unsigned len) | ||
2079 | { | ||
2080 | if (len > buffer->len) | ||
2081 | return NULL; | ||
2082 | return buffer->ptr; | ||
2083 | } | ||
2084 | |||
2085 | static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer, | ||
2086 | unsigned len) | ||
2087 | { | ||
2088 | void *ret; | ||
2089 | |||
2090 | ret = pv_mmu_peek_buffer(buffer, len); | ||
2091 | if (!ret) | ||
2092 | return ret; | ||
2093 | buffer->ptr += len; | ||
2094 | buffer->len -= len; | ||
2095 | buffer->processed += len; | ||
2096 | return ret; | ||
2097 | } | ||
2098 | |||
2099 | static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, | ||
2100 | gpa_t addr, gpa_t value) | ||
2101 | { | ||
2102 | int bytes = 8; | ||
2103 | int r; | ||
2104 | |||
2105 | if (!is_long_mode(vcpu) && !is_pae(vcpu)) | ||
2106 | bytes = 4; | ||
2107 | |||
2108 | r = mmu_topup_memory_caches(vcpu); | ||
2109 | if (r) | ||
2110 | return r; | ||
2111 | |||
2112 | if (!emulator_write_phys(vcpu, addr, &value, bytes)) | ||
2113 | return -EFAULT; | ||
2114 | |||
2115 | return 1; | ||
2116 | } | ||
2117 | |||
2118 | static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) | ||
2119 | { | ||
2120 | kvm_x86_ops->tlb_flush(vcpu); | ||
2121 | return 1; | ||
2122 | } | ||
2123 | |||
2124 | static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) | ||
2125 | { | ||
2126 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2127 | mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); | ||
2128 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2129 | return 1; | ||
2130 | } | ||
2131 | |||
2132 | static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu, | ||
2133 | struct kvm_pv_mmu_op_buffer *buffer) | ||
2134 | { | ||
2135 | struct kvm_mmu_op_header *header; | ||
2136 | |||
2137 | header = pv_mmu_peek_buffer(buffer, sizeof *header); | ||
2138 | if (!header) | ||
2139 | return 0; | ||
2140 | switch (header->op) { | ||
2141 | case KVM_MMU_OP_WRITE_PTE: { | ||
2142 | struct kvm_mmu_op_write_pte *wpte; | ||
2143 | |||
2144 | wpte = pv_mmu_read_buffer(buffer, sizeof *wpte); | ||
2145 | if (!wpte) | ||
2146 | return 0; | ||
2147 | return kvm_pv_mmu_write(vcpu, wpte->pte_phys, | ||
2148 | wpte->pte_val); | ||
2149 | } | ||
2150 | case KVM_MMU_OP_FLUSH_TLB: { | ||
2151 | struct kvm_mmu_op_flush_tlb *ftlb; | ||
2152 | |||
2153 | ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb); | ||
2154 | if (!ftlb) | ||
2155 | return 0; | ||
2156 | return kvm_pv_mmu_flush_tlb(vcpu); | ||
2157 | } | ||
2158 | case KVM_MMU_OP_RELEASE_PT: { | ||
2159 | struct kvm_mmu_op_release_pt *rpt; | ||
2160 | |||
2161 | rpt = pv_mmu_read_buffer(buffer, sizeof *rpt); | ||
2162 | if (!rpt) | ||
2163 | return 0; | ||
2164 | return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys); | ||
2165 | } | ||
2166 | default: return 0; | ||
2167 | } | ||
2168 | } | ||
2169 | |||
2170 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | ||
2171 | gpa_t addr, unsigned long *ret) | ||
2172 | { | ||
2173 | int r; | ||
2174 | struct kvm_pv_mmu_op_buffer buffer; | ||
2175 | |||
2176 | buffer.ptr = buffer.buf; | ||
2177 | buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf); | ||
2178 | buffer.processed = 0; | ||
2179 | |||
2180 | r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len); | ||
2181 | if (r) | ||
2182 | goto out; | ||
2183 | |||
2184 | while (buffer.len) { | ||
2185 | r = kvm_pv_mmu_op_one(vcpu, &buffer); | ||
2186 | if (r < 0) | ||
2187 | goto out; | ||
2188 | if (r == 0) | ||
2189 | break; | ||
2190 | } | ||
2191 | |||
2192 | r = 1; | ||
2193 | out: | ||
2194 | *ret = buffer.processed; | ||
2195 | return r; | ||
2196 | } | ||
2197 | |||
1735 | #ifdef AUDIT | 2198 | #ifdef AUDIT |
1736 | 2199 | ||
1737 | static const char *audit_msg; | 2200 | static const char *audit_msg; |
@@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
1768 | audit_mappings_page(vcpu, ent, va, level - 1); | 2231 | audit_mappings_page(vcpu, ent, va, level - 1); |
1769 | } else { | 2232 | } else { |
1770 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 2233 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); |
1771 | struct page *page = gpa_to_page(vcpu, gpa); | 2234 | hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; |
1772 | hpa_t hpa = page_to_phys(page); | ||
1773 | 2235 | ||
1774 | if (is_shadow_present_pte(ent) | 2236 | if (is_shadow_present_pte(ent) |
1775 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 2237 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
@@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
1782 | && !is_error_hpa(hpa)) | 2244 | && !is_error_hpa(hpa)) |
1783 | printk(KERN_ERR "audit: (%s) notrap shadow," | 2245 | printk(KERN_ERR "audit: (%s) notrap shadow," |
1784 | " valid guest gva %lx\n", audit_msg, va); | 2246 | " valid guest gva %lx\n", audit_msg, va); |
1785 | kvm_release_page_clean(page); | 2247 | kvm_release_pfn_clean(pfn); |
1786 | 2248 | ||
1787 | } | 2249 | } |
1788 | } | 2250 | } |
@@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu) | |||
1867 | 2329 | ||
1868 | if (n_rmap != n_actual) | 2330 | if (n_rmap != n_actual) |
1869 | printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", | 2331 | printk(KERN_ERR "%s: (%s) rmap %d actual %d\n", |
1870 | __FUNCTION__, audit_msg, n_rmap, n_actual); | 2332 | __func__, audit_msg, n_rmap, n_actual); |
1871 | } | 2333 | } |
1872 | 2334 | ||
1873 | static void audit_write_protection(struct kvm_vcpu *vcpu) | 2335 | static void audit_write_protection(struct kvm_vcpu *vcpu) |
@@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
1887 | if (*rmapp) | 2349 | if (*rmapp) |
1888 | printk(KERN_ERR "%s: (%s) shadow page has writable" | 2350 | printk(KERN_ERR "%s: (%s) shadow page has writable" |
1889 | " mappings: gfn %lx role %x\n", | 2351 | " mappings: gfn %lx role %x\n", |
1890 | __FUNCTION__, audit_msg, sp->gfn, | 2352 | __func__, audit_msg, sp->gfn, |
1891 | sp->role.word); | 2353 | sp->role.word); |
1892 | } | 2354 | } |
1893 | } | 2355 | } |