diff options
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r-- | drivers/kvm/mmu.c | 292 |
1 files changed, 133 insertions, 159 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index e8e228118de9..b297a6b111ac 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c | |||
@@ -16,15 +16,18 @@ | |||
16 | * the COPYING file in the top-level directory. | 16 | * the COPYING file in the top-level directory. |
17 | * | 17 | * |
18 | */ | 18 | */ |
19 | |||
20 | #include "vmx.h" | ||
21 | #include "kvm.h" | ||
22 | |||
19 | #include <linux/types.h> | 23 | #include <linux/types.h> |
20 | #include <linux/string.h> | 24 | #include <linux/string.h> |
21 | #include <asm/page.h> | ||
22 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
23 | #include <linux/highmem.h> | 26 | #include <linux/highmem.h> |
24 | #include <linux/module.h> | 27 | #include <linux/module.h> |
25 | 28 | ||
26 | #include "vmx.h" | 29 | #include <asm/page.h> |
27 | #include "kvm.h" | 30 | #include <asm/cmpxchg.h> |
28 | 31 | ||
29 | #undef MMU_DEBUG | 32 | #undef MMU_DEBUG |
30 | 33 | ||
@@ -90,25 +93,11 @@ static int dbg = 1; | |||
90 | #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | 93 | #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) |
91 | 94 | ||
92 | 95 | ||
93 | #define PT32_PTE_COPY_MASK \ | ||
94 | (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK) | ||
95 | |||
96 | #define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK) | ||
97 | |||
98 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 | 96 | #define PT_FIRST_AVAIL_BITS_SHIFT 9 |
99 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 | 97 | #define PT64_SECOND_AVAIL_BITS_SHIFT 52 |
100 | 98 | ||
101 | #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | ||
102 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) | 99 | #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) |
103 | 100 | ||
104 | #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1) | ||
105 | #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT) | ||
106 | |||
107 | #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1) | ||
108 | #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT)) | ||
109 | |||
110 | #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT) | ||
111 | |||
112 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) | 101 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
113 | 102 | ||
114 | #define PT64_LEVEL_BITS 9 | 103 | #define PT64_LEVEL_BITS 9 |
@@ -165,6 +154,8 @@ struct kvm_rmap_desc { | |||
165 | 154 | ||
166 | static struct kmem_cache *pte_chain_cache; | 155 | static struct kmem_cache *pte_chain_cache; |
167 | static struct kmem_cache *rmap_desc_cache; | 156 | static struct kmem_cache *rmap_desc_cache; |
157 | static struct kmem_cache *mmu_page_cache; | ||
158 | static struct kmem_cache *mmu_page_header_cache; | ||
168 | 159 | ||
169 | static int is_write_protection(struct kvm_vcpu *vcpu) | 160 | static int is_write_protection(struct kvm_vcpu *vcpu) |
170 | { | 161 | { |
@@ -202,6 +193,15 @@ static int is_rmap_pte(u64 pte) | |||
202 | == (PT_WRITABLE_MASK | PT_PRESENT_MASK); | 193 | == (PT_WRITABLE_MASK | PT_PRESENT_MASK); |
203 | } | 194 | } |
204 | 195 | ||
196 | static void set_shadow_pte(u64 *sptep, u64 spte) | ||
197 | { | ||
198 | #ifdef CONFIG_X86_64 | ||
199 | set_64bit((unsigned long *)sptep, spte); | ||
200 | #else | ||
201 | set_64bit((unsigned long long *)sptep, spte); | ||
202 | #endif | ||
203 | } | ||
204 | |||
205 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 205 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
206 | struct kmem_cache *base_cache, int min, | 206 | struct kmem_cache *base_cache, int min, |
207 | gfp_t gfp_flags) | 207 | gfp_t gfp_flags) |
@@ -235,6 +235,14 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) | |||
235 | goto out; | 235 | goto out; |
236 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, | 236 | r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, |
237 | rmap_desc_cache, 1, gfp_flags); | 237 | rmap_desc_cache, 1, gfp_flags); |
238 | if (r) | ||
239 | goto out; | ||
240 | r = mmu_topup_memory_cache(&vcpu->mmu_page_cache, | ||
241 | mmu_page_cache, 4, gfp_flags); | ||
242 | if (r) | ||
243 | goto out; | ||
244 | r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, | ||
245 | mmu_page_header_cache, 4, gfp_flags); | ||
238 | out: | 246 | out: |
239 | return r; | 247 | return r; |
240 | } | 248 | } |
@@ -258,6 +266,8 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |||
258 | { | 266 | { |
259 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); | 267 | mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); |
260 | mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); | 268 | mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); |
269 | mmu_free_memory_cache(&vcpu->mmu_page_cache); | ||
270 | mmu_free_memory_cache(&vcpu->mmu_page_header_cache); | ||
261 | } | 271 | } |
262 | 272 | ||
263 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, | 273 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, |
@@ -433,19 +443,18 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) | |||
433 | BUG_ON(!(*spte & PT_WRITABLE_MASK)); | 443 | BUG_ON(!(*spte & PT_WRITABLE_MASK)); |
434 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 444 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
435 | rmap_remove(vcpu, spte); | 445 | rmap_remove(vcpu, spte); |
436 | kvm_arch_ops->tlb_flush(vcpu); | 446 | set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK); |
437 | *spte &= ~(u64)PT_WRITABLE_MASK; | 447 | kvm_flush_remote_tlbs(vcpu->kvm); |
438 | } | 448 | } |
439 | } | 449 | } |
440 | 450 | ||
441 | #ifdef MMU_DEBUG | 451 | #ifdef MMU_DEBUG |
442 | static int is_empty_shadow_page(hpa_t page_hpa) | 452 | static int is_empty_shadow_page(u64 *spt) |
443 | { | 453 | { |
444 | u64 *pos; | 454 | u64 *pos; |
445 | u64 *end; | 455 | u64 *end; |
446 | 456 | ||
447 | for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); | 457 | for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) |
448 | pos != end; pos++) | ||
449 | if (*pos != 0) { | 458 | if (*pos != 0) { |
450 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, | 459 | printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, |
451 | pos, *pos); | 460 | pos, *pos); |
@@ -455,13 +464,13 @@ static int is_empty_shadow_page(hpa_t page_hpa) | |||
455 | } | 464 | } |
456 | #endif | 465 | #endif |
457 | 466 | ||
458 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) | 467 | static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, |
468 | struct kvm_mmu_page *page_head) | ||
459 | { | 469 | { |
460 | struct kvm_mmu_page *page_head = page_header(page_hpa); | 470 | ASSERT(is_empty_shadow_page(page_head->spt)); |
461 | 471 | list_del(&page_head->link); | |
462 | ASSERT(is_empty_shadow_page(page_hpa)); | 472 | mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); |
463 | page_head->page_hpa = page_hpa; | 473 | mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); |
464 | list_move(&page_head->link, &vcpu->free_pages); | ||
465 | ++vcpu->kvm->n_free_mmu_pages; | 474 | ++vcpu->kvm->n_free_mmu_pages; |
466 | } | 475 | } |
467 | 476 | ||
@@ -475,12 +484,15 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
475 | { | 484 | { |
476 | struct kvm_mmu_page *page; | 485 | struct kvm_mmu_page *page; |
477 | 486 | ||
478 | if (list_empty(&vcpu->free_pages)) | 487 | if (!vcpu->kvm->n_free_mmu_pages) |
479 | return NULL; | 488 | return NULL; |
480 | 489 | ||
481 | page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); | 490 | page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, |
482 | list_move(&page->link, &vcpu->kvm->active_mmu_pages); | 491 | sizeof *page); |
483 | ASSERT(is_empty_shadow_page(page->page_hpa)); | 492 | page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); |
493 | set_page_private(virt_to_page(page->spt), (unsigned long)page); | ||
494 | list_add(&page->link, &vcpu->kvm->active_mmu_pages); | ||
495 | ASSERT(is_empty_shadow_page(page->spt)); | ||
484 | page->slot_bitmap = 0; | 496 | page->slot_bitmap = 0; |
485 | page->multimapped = 0; | 497 | page->multimapped = 0; |
486 | page->parent_pte = parent_pte; | 498 | page->parent_pte = parent_pte; |
@@ -638,7 +650,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | |||
638 | u64 *pt; | 650 | u64 *pt; |
639 | u64 ent; | 651 | u64 ent; |
640 | 652 | ||
641 | pt = __va(page->page_hpa); | 653 | pt = page->spt; |
642 | 654 | ||
643 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { | 655 | if (page->role.level == PT_PAGE_TABLE_LEVEL) { |
644 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 656 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
@@ -646,7 +658,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | |||
646 | rmap_remove(vcpu, &pt[i]); | 658 | rmap_remove(vcpu, &pt[i]); |
647 | pt[i] = 0; | 659 | pt[i] = 0; |
648 | } | 660 | } |
649 | kvm_arch_ops->tlb_flush(vcpu); | 661 | kvm_flush_remote_tlbs(vcpu->kvm); |
650 | return; | 662 | return; |
651 | } | 663 | } |
652 | 664 | ||
@@ -659,6 +671,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu, | |||
659 | ent &= PT64_BASE_ADDR_MASK; | 671 | ent &= PT64_BASE_ADDR_MASK; |
660 | mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); | 672 | mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); |
661 | } | 673 | } |
674 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
662 | } | 675 | } |
663 | 676 | ||
664 | static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, | 677 | static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, |
@@ -685,12 +698,12 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, | |||
685 | } | 698 | } |
686 | BUG_ON(!parent_pte); | 699 | BUG_ON(!parent_pte); |
687 | kvm_mmu_put_page(vcpu, page, parent_pte); | 700 | kvm_mmu_put_page(vcpu, page, parent_pte); |
688 | *parent_pte = 0; | 701 | set_shadow_pte(parent_pte, 0); |
689 | } | 702 | } |
690 | kvm_mmu_page_unlink_children(vcpu, page); | 703 | kvm_mmu_page_unlink_children(vcpu, page); |
691 | if (!page->root_count) { | 704 | if (!page->root_count) { |
692 | hlist_del(&page->hash_link); | 705 | hlist_del(&page->hash_link); |
693 | kvm_mmu_free_page(vcpu, page->page_hpa); | 706 | kvm_mmu_free_page(vcpu, page); |
694 | } else | 707 | } else |
695 | list_move(&page->link, &vcpu->kvm->active_mmu_pages); | 708 | list_move(&page->link, &vcpu->kvm->active_mmu_pages); |
696 | } | 709 | } |
@@ -717,6 +730,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
717 | return r; | 730 | return r; |
718 | } | 731 | } |
719 | 732 | ||
733 | static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
734 | { | ||
735 | struct kvm_mmu_page *page; | ||
736 | |||
737 | while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) { | ||
738 | pgprintk("%s: zap %lx %x\n", | ||
739 | __FUNCTION__, gfn, page->role.word); | ||
740 | kvm_mmu_zap_page(vcpu, page); | ||
741 | } | ||
742 | } | ||
743 | |||
720 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) | 744 | static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) |
721 | { | 745 | { |
722 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); | 746 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); |
@@ -805,7 +829,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) | |||
805 | return -ENOMEM; | 829 | return -ENOMEM; |
806 | } | 830 | } |
807 | 831 | ||
808 | table[index] = new_table->page_hpa | PT_PRESENT_MASK | 832 | table[index] = __pa(new_table->spt) | PT_PRESENT_MASK |
809 | | PT_WRITABLE_MASK | PT_USER_MASK; | 833 | | PT_WRITABLE_MASK | PT_USER_MASK; |
810 | } | 834 | } |
811 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | 835 | table_addr = table[index] & PT64_BASE_ADDR_MASK; |
@@ -817,11 +841,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
817 | int i; | 841 | int i; |
818 | struct kvm_mmu_page *page; | 842 | struct kvm_mmu_page *page; |
819 | 843 | ||
844 | if (!VALID_PAGE(vcpu->mmu.root_hpa)) | ||
845 | return; | ||
820 | #ifdef CONFIG_X86_64 | 846 | #ifdef CONFIG_X86_64 |
821 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 847 | if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
822 | hpa_t root = vcpu->mmu.root_hpa; | 848 | hpa_t root = vcpu->mmu.root_hpa; |
823 | 849 | ||
824 | ASSERT(VALID_PAGE(root)); | ||
825 | page = page_header(root); | 850 | page = page_header(root); |
826 | --page->root_count; | 851 | --page->root_count; |
827 | vcpu->mmu.root_hpa = INVALID_PAGE; | 852 | vcpu->mmu.root_hpa = INVALID_PAGE; |
@@ -832,7 +857,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
832 | hpa_t root = vcpu->mmu.pae_root[i]; | 857 | hpa_t root = vcpu->mmu.pae_root[i]; |
833 | 858 | ||
834 | if (root) { | 859 | if (root) { |
835 | ASSERT(VALID_PAGE(root)); | ||
836 | root &= PT64_BASE_ADDR_MASK; | 860 | root &= PT64_BASE_ADDR_MASK; |
837 | page = page_header(root); | 861 | page = page_header(root); |
838 | --page->root_count; | 862 | --page->root_count; |
@@ -857,7 +881,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
857 | ASSERT(!VALID_PAGE(root)); | 881 | ASSERT(!VALID_PAGE(root)); |
858 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, | 882 | page = kvm_mmu_get_page(vcpu, root_gfn, 0, |
859 | PT64_ROOT_LEVEL, 0, 0, NULL); | 883 | PT64_ROOT_LEVEL, 0, 0, NULL); |
860 | root = page->page_hpa; | 884 | root = __pa(page->spt); |
861 | ++page->root_count; | 885 | ++page->root_count; |
862 | vcpu->mmu.root_hpa = root; | 886 | vcpu->mmu.root_hpa = root; |
863 | return; | 887 | return; |
@@ -878,7 +902,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
878 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 902 | page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
879 | PT32_ROOT_LEVEL, !is_paging(vcpu), | 903 | PT32_ROOT_LEVEL, !is_paging(vcpu), |
880 | 0, NULL); | 904 | 0, NULL); |
881 | root = page->page_hpa; | 905 | root = __pa(page->spt); |
882 | ++page->root_count; | 906 | ++page->root_count; |
883 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; | 907 | vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; |
884 | } | 908 | } |
@@ -928,9 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
928 | context->free = nonpaging_free; | 952 | context->free = nonpaging_free; |
929 | context->root_level = 0; | 953 | context->root_level = 0; |
930 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 954 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
931 | mmu_alloc_roots(vcpu); | 955 | context->root_hpa = INVALID_PAGE; |
932 | ASSERT(VALID_PAGE(context->root_hpa)); | ||
933 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa); | ||
934 | return 0; | 956 | return 0; |
935 | } | 957 | } |
936 | 958 | ||
@@ -944,59 +966,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) | |||
944 | { | 966 | { |
945 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); | 967 | pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); |
946 | mmu_free_roots(vcpu); | 968 | mmu_free_roots(vcpu); |
947 | if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES)) | ||
948 | kvm_mmu_free_some_pages(vcpu); | ||
949 | mmu_alloc_roots(vcpu); | ||
950 | kvm_mmu_flush_tlb(vcpu); | ||
951 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | ||
952 | } | ||
953 | |||
954 | static inline void set_pte_common(struct kvm_vcpu *vcpu, | ||
955 | u64 *shadow_pte, | ||
956 | gpa_t gaddr, | ||
957 | int dirty, | ||
958 | u64 access_bits, | ||
959 | gfn_t gfn) | ||
960 | { | ||
961 | hpa_t paddr; | ||
962 | |||
963 | *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET; | ||
964 | if (!dirty) | ||
965 | access_bits &= ~PT_WRITABLE_MASK; | ||
966 | |||
967 | paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK); | ||
968 | |||
969 | *shadow_pte |= access_bits; | ||
970 | |||
971 | if (is_error_hpa(paddr)) { | ||
972 | *shadow_pte |= gaddr; | ||
973 | *shadow_pte |= PT_SHADOW_IO_MARK; | ||
974 | *shadow_pte &= ~PT_PRESENT_MASK; | ||
975 | return; | ||
976 | } | ||
977 | |||
978 | *shadow_pte |= paddr; | ||
979 | |||
980 | if (access_bits & PT_WRITABLE_MASK) { | ||
981 | struct kvm_mmu_page *shadow; | ||
982 | |||
983 | shadow = kvm_mmu_lookup_page(vcpu, gfn); | ||
984 | if (shadow) { | ||
985 | pgprintk("%s: found shadow page for %lx, marking ro\n", | ||
986 | __FUNCTION__, gfn); | ||
987 | access_bits &= ~PT_WRITABLE_MASK; | ||
988 | if (is_writeble_pte(*shadow_pte)) { | ||
989 | *shadow_pte &= ~PT_WRITABLE_MASK; | ||
990 | kvm_arch_ops->tlb_flush(vcpu); | ||
991 | } | ||
992 | } | ||
993 | } | ||
994 | |||
995 | if (access_bits & PT_WRITABLE_MASK) | ||
996 | mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT); | ||
997 | |||
998 | page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); | ||
999 | rmap_add(vcpu, shadow_pte); | ||
1000 | } | 969 | } |
1001 | 970 | ||
1002 | static void inject_page_fault(struct kvm_vcpu *vcpu, | 971 | static void inject_page_fault(struct kvm_vcpu *vcpu, |
@@ -1006,23 +975,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, | |||
1006 | kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); | 975 | kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); |
1007 | } | 976 | } |
1008 | 977 | ||
1009 | static inline int fix_read_pf(u64 *shadow_ent) | ||
1010 | { | ||
1011 | if ((*shadow_ent & PT_SHADOW_USER_MASK) && | ||
1012 | !(*shadow_ent & PT_USER_MASK)) { | ||
1013 | /* | ||
1014 | * If supervisor write protect is disabled, we shadow kernel | ||
1015 | * pages as user pages so we can trap the write access. | ||
1016 | */ | ||
1017 | *shadow_ent |= PT_USER_MASK; | ||
1018 | *shadow_ent &= ~PT_WRITABLE_MASK; | ||
1019 | |||
1020 | return 1; | ||
1021 | |||
1022 | } | ||
1023 | return 0; | ||
1024 | } | ||
1025 | |||
1026 | static void paging_free(struct kvm_vcpu *vcpu) | 978 | static void paging_free(struct kvm_vcpu *vcpu) |
1027 | { | 979 | { |
1028 | nonpaging_free(vcpu); | 980 | nonpaging_free(vcpu); |
@@ -1047,10 +999,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
1047 | context->free = paging_free; | 999 | context->free = paging_free; |
1048 | context->root_level = level; | 1000 | context->root_level = level; |
1049 | context->shadow_root_level = level; | 1001 | context->shadow_root_level = level; |
1050 | mmu_alloc_roots(vcpu); | 1002 | context->root_hpa = INVALID_PAGE; |
1051 | ASSERT(VALID_PAGE(context->root_hpa)); | ||
1052 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | ||
1053 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | ||
1054 | return 0; | 1003 | return 0; |
1055 | } | 1004 | } |
1056 | 1005 | ||
@@ -1069,10 +1018,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
1069 | context->free = paging_free; | 1018 | context->free = paging_free; |
1070 | context->root_level = PT32_ROOT_LEVEL; | 1019 | context->root_level = PT32_ROOT_LEVEL; |
1071 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1020 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
1072 | mmu_alloc_roots(vcpu); | 1021 | context->root_hpa = INVALID_PAGE; |
1073 | ASSERT(VALID_PAGE(context->root_hpa)); | ||
1074 | kvm_arch_ops->set_cr3(vcpu, context->root_hpa | | ||
1075 | (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); | ||
1076 | return 0; | 1022 | return 0; |
1077 | } | 1023 | } |
1078 | 1024 | ||
@@ -1107,18 +1053,33 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu) | |||
1107 | 1053 | ||
1108 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) | 1054 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) |
1109 | { | 1055 | { |
1056 | destroy_kvm_mmu(vcpu); | ||
1057 | return init_kvm_mmu(vcpu); | ||
1058 | } | ||
1059 | |||
1060 | int kvm_mmu_load(struct kvm_vcpu *vcpu) | ||
1061 | { | ||
1110 | int r; | 1062 | int r; |
1111 | 1063 | ||
1112 | destroy_kvm_mmu(vcpu); | 1064 | spin_lock(&vcpu->kvm->lock); |
1113 | r = init_kvm_mmu(vcpu); | ||
1114 | if (r < 0) | ||
1115 | goto out; | ||
1116 | r = mmu_topup_memory_caches(vcpu); | 1065 | r = mmu_topup_memory_caches(vcpu); |
1066 | if (r) | ||
1067 | goto out; | ||
1068 | mmu_alloc_roots(vcpu); | ||
1069 | kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); | ||
1070 | kvm_mmu_flush_tlb(vcpu); | ||
1117 | out: | 1071 | out: |
1072 | spin_unlock(&vcpu->kvm->lock); | ||
1118 | return r; | 1073 | return r; |
1119 | } | 1074 | } |
1075 | EXPORT_SYMBOL_GPL(kvm_mmu_load); | ||
1076 | |||
1077 | void kvm_mmu_unload(struct kvm_vcpu *vcpu) | ||
1078 | { | ||
1079 | mmu_free_roots(vcpu); | ||
1080 | } | ||
1120 | 1081 | ||
1121 | static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, | 1082 | static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, |
1122 | struct kvm_mmu_page *page, | 1083 | struct kvm_mmu_page *page, |
1123 | u64 *spte) | 1084 | u64 *spte) |
1124 | { | 1085 | { |
@@ -1135,9 +1096,25 @@ static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, | |||
1135 | } | 1096 | } |
1136 | } | 1097 | } |
1137 | *spte = 0; | 1098 | *spte = 0; |
1099 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1100 | } | ||
1101 | |||
1102 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | ||
1103 | struct kvm_mmu_page *page, | ||
1104 | u64 *spte, | ||
1105 | const void *new, int bytes) | ||
1106 | { | ||
1107 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | ||
1108 | return; | ||
1109 | |||
1110 | if (page->role.glevels == PT32_ROOT_LEVEL) | ||
1111 | paging32_update_pte(vcpu, page, spte, new, bytes); | ||
1112 | else | ||
1113 | paging64_update_pte(vcpu, page, spte, new, bytes); | ||
1138 | } | 1114 | } |
1139 | 1115 | ||
1140 | void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | 1116 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
1117 | const u8 *old, const u8 *new, int bytes) | ||
1141 | { | 1118 | { |
1142 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1119 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1143 | struct kvm_mmu_page *page; | 1120 | struct kvm_mmu_page *page; |
@@ -1149,6 +1126,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | |||
1149 | unsigned pte_size; | 1126 | unsigned pte_size; |
1150 | unsigned page_offset; | 1127 | unsigned page_offset; |
1151 | unsigned misaligned; | 1128 | unsigned misaligned; |
1129 | unsigned quadrant; | ||
1152 | int level; | 1130 | int level; |
1153 | int flooded = 0; | 1131 | int flooded = 0; |
1154 | int npte; | 1132 | int npte; |
@@ -1169,6 +1147,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | |||
1169 | continue; | 1147 | continue; |
1170 | pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 1148 | pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; |
1171 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 1149 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
1150 | misaligned |= bytes < 4; | ||
1172 | if (misaligned || flooded) { | 1151 | if (misaligned || flooded) { |
1173 | /* | 1152 | /* |
1174 | * Misaligned accesses are too much trouble to fix | 1153 | * Misaligned accesses are too much trouble to fix |
@@ -1200,21 +1179,20 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | |||
1200 | page_offset <<= 1; | 1179 | page_offset <<= 1; |
1201 | npte = 2; | 1180 | npte = 2; |
1202 | } | 1181 | } |
1182 | quadrant = page_offset >> PAGE_SHIFT; | ||
1203 | page_offset &= ~PAGE_MASK; | 1183 | page_offset &= ~PAGE_MASK; |
1184 | if (quadrant != page->role.quadrant) | ||
1185 | continue; | ||
1204 | } | 1186 | } |
1205 | spte = __va(page->page_hpa); | 1187 | spte = &page->spt[page_offset / sizeof(*spte)]; |
1206 | spte += page_offset / sizeof(*spte); | ||
1207 | while (npte--) { | 1188 | while (npte--) { |
1208 | mmu_pre_write_zap_pte(vcpu, page, spte); | 1189 | mmu_pte_write_zap_pte(vcpu, page, spte); |
1190 | mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); | ||
1209 | ++spte; | 1191 | ++spte; |
1210 | } | 1192 | } |
1211 | } | 1193 | } |
1212 | } | 1194 | } |
1213 | 1195 | ||
1214 | void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) | ||
1215 | { | ||
1216 | } | ||
1217 | |||
1218 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 1196 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
1219 | { | 1197 | { |
1220 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); | 1198 | gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); |
@@ -1243,13 +1221,6 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu) | |||
1243 | struct kvm_mmu_page, link); | 1221 | struct kvm_mmu_page, link); |
1244 | kvm_mmu_zap_page(vcpu, page); | 1222 | kvm_mmu_zap_page(vcpu, page); |
1245 | } | 1223 | } |
1246 | while (!list_empty(&vcpu->free_pages)) { | ||
1247 | page = list_entry(vcpu->free_pages.next, | ||
1248 | struct kvm_mmu_page, link); | ||
1249 | list_del(&page->link); | ||
1250 | __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); | ||
1251 | page->page_hpa = INVALID_PAGE; | ||
1252 | } | ||
1253 | free_page((unsigned long)vcpu->mmu.pae_root); | 1224 | free_page((unsigned long)vcpu->mmu.pae_root); |
1254 | } | 1225 | } |
1255 | 1226 | ||
@@ -1260,18 +1231,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
1260 | 1231 | ||
1261 | ASSERT(vcpu); | 1232 | ASSERT(vcpu); |
1262 | 1233 | ||
1263 | for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { | 1234 | vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES; |
1264 | struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; | ||
1265 | |||
1266 | INIT_LIST_HEAD(&page_header->link); | ||
1267 | if ((page = alloc_page(GFP_KERNEL)) == NULL) | ||
1268 | goto error_1; | ||
1269 | set_page_private(page, (unsigned long)page_header); | ||
1270 | page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; | ||
1271 | memset(__va(page_header->page_hpa), 0, PAGE_SIZE); | ||
1272 | list_add(&page_header->link, &vcpu->free_pages); | ||
1273 | ++vcpu->kvm->n_free_mmu_pages; | ||
1274 | } | ||
1275 | 1235 | ||
1276 | /* | 1236 | /* |
1277 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. | 1237 | * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. |
@@ -1296,7 +1256,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) | |||
1296 | { | 1256 | { |
1297 | ASSERT(vcpu); | 1257 | ASSERT(vcpu); |
1298 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | 1258 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); |
1299 | ASSERT(list_empty(&vcpu->free_pages)); | ||
1300 | 1259 | ||
1301 | return alloc_mmu_pages(vcpu); | 1260 | return alloc_mmu_pages(vcpu); |
1302 | } | 1261 | } |
@@ -1305,7 +1264,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) | |||
1305 | { | 1264 | { |
1306 | ASSERT(vcpu); | 1265 | ASSERT(vcpu); |
1307 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); | 1266 | ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); |
1308 | ASSERT(!list_empty(&vcpu->free_pages)); | ||
1309 | 1267 | ||
1310 | return init_kvm_mmu(vcpu); | 1268 | return init_kvm_mmu(vcpu); |
1311 | } | 1269 | } |
@@ -1331,7 +1289,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) | |||
1331 | if (!test_bit(slot, &page->slot_bitmap)) | 1289 | if (!test_bit(slot, &page->slot_bitmap)) |
1332 | continue; | 1290 | continue; |
1333 | 1291 | ||
1334 | pt = __va(page->page_hpa); | 1292 | pt = page->spt; |
1335 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 1293 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) |
1336 | /* avoid RMW */ | 1294 | /* avoid RMW */ |
1337 | if (pt[i] & PT_WRITABLE_MASK) { | 1295 | if (pt[i] & PT_WRITABLE_MASK) { |
@@ -1354,7 +1312,7 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) | |||
1354 | } | 1312 | } |
1355 | 1313 | ||
1356 | mmu_free_memory_caches(vcpu); | 1314 | mmu_free_memory_caches(vcpu); |
1357 | kvm_arch_ops->tlb_flush(vcpu); | 1315 | kvm_flush_remote_tlbs(vcpu->kvm); |
1358 | init_kvm_mmu(vcpu); | 1316 | init_kvm_mmu(vcpu); |
1359 | } | 1317 | } |
1360 | 1318 | ||
@@ -1364,6 +1322,10 @@ void kvm_mmu_module_exit(void) | |||
1364 | kmem_cache_destroy(pte_chain_cache); | 1322 | kmem_cache_destroy(pte_chain_cache); |
1365 | if (rmap_desc_cache) | 1323 | if (rmap_desc_cache) |
1366 | kmem_cache_destroy(rmap_desc_cache); | 1324 | kmem_cache_destroy(rmap_desc_cache); |
1325 | if (mmu_page_cache) | ||
1326 | kmem_cache_destroy(mmu_page_cache); | ||
1327 | if (mmu_page_header_cache) | ||
1328 | kmem_cache_destroy(mmu_page_header_cache); | ||
1367 | } | 1329 | } |
1368 | 1330 | ||
1369 | int kvm_mmu_module_init(void) | 1331 | int kvm_mmu_module_init(void) |
@@ -1379,6 +1341,18 @@ int kvm_mmu_module_init(void) | |||
1379 | if (!rmap_desc_cache) | 1341 | if (!rmap_desc_cache) |
1380 | goto nomem; | 1342 | goto nomem; |
1381 | 1343 | ||
1344 | mmu_page_cache = kmem_cache_create("kvm_mmu_page", | ||
1345 | PAGE_SIZE, | ||
1346 | PAGE_SIZE, 0, NULL, NULL); | ||
1347 | if (!mmu_page_cache) | ||
1348 | goto nomem; | ||
1349 | |||
1350 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", | ||
1351 | sizeof(struct kvm_mmu_page), | ||
1352 | 0, 0, NULL, NULL); | ||
1353 | if (!mmu_page_header_cache) | ||
1354 | goto nomem; | ||
1355 | |||
1382 | return 0; | 1356 | return 0; |
1383 | 1357 | ||
1384 | nomem: | 1358 | nomem: |
@@ -1482,7 +1456,7 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu) | |||
1482 | int i; | 1456 | int i; |
1483 | 1457 | ||
1484 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { | 1458 | list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { |
1485 | u64 *pt = __va(page->page_hpa); | 1459 | u64 *pt = page->spt; |
1486 | 1460 | ||
1487 | if (page->role.level != PT_PAGE_TABLE_LEVEL) | 1461 | if (page->role.level != PT_PAGE_TABLE_LEVEL) |
1488 | continue; | 1462 | continue; |