aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/kvm/mmu.c')
-rw-r--r--drivers/kvm/mmu.c292
1 files changed, 133 insertions, 159 deletions
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index e8e228118de9..b297a6b111ac 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -16,15 +16,18 @@
16 * the COPYING file in the top-level directory. 16 * the COPYING file in the top-level directory.
17 * 17 *
18 */ 18 */
19
20#include "vmx.h"
21#include "kvm.h"
22
19#include <linux/types.h> 23#include <linux/types.h>
20#include <linux/string.h> 24#include <linux/string.h>
21#include <asm/page.h>
22#include <linux/mm.h> 25#include <linux/mm.h>
23#include <linux/highmem.h> 26#include <linux/highmem.h>
24#include <linux/module.h> 27#include <linux/module.h>
25 28
26#include "vmx.h" 29#include <asm/page.h>
27#include "kvm.h" 30#include <asm/cmpxchg.h>
28 31
29#undef MMU_DEBUG 32#undef MMU_DEBUG
30 33
@@ -90,25 +93,11 @@ static int dbg = 1;
90#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) 93#define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
91 94
92 95
93#define PT32_PTE_COPY_MASK \
94 (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK)
95
96#define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK)
97
98#define PT_FIRST_AVAIL_BITS_SHIFT 9 96#define PT_FIRST_AVAIL_BITS_SHIFT 9
99#define PT64_SECOND_AVAIL_BITS_SHIFT 52 97#define PT64_SECOND_AVAIL_BITS_SHIFT 52
100 98
101#define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
102#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) 99#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
103 100
104#define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
105#define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
106
107#define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
108#define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
109
110#define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
111
112#define VALID_PAGE(x) ((x) != INVALID_PAGE) 101#define VALID_PAGE(x) ((x) != INVALID_PAGE)
113 102
114#define PT64_LEVEL_BITS 9 103#define PT64_LEVEL_BITS 9
@@ -165,6 +154,8 @@ struct kvm_rmap_desc {
165 154
166static struct kmem_cache *pte_chain_cache; 155static struct kmem_cache *pte_chain_cache;
167static struct kmem_cache *rmap_desc_cache; 156static struct kmem_cache *rmap_desc_cache;
157static struct kmem_cache *mmu_page_cache;
158static struct kmem_cache *mmu_page_header_cache;
168 159
169static int is_write_protection(struct kvm_vcpu *vcpu) 160static int is_write_protection(struct kvm_vcpu *vcpu)
170{ 161{
@@ -202,6 +193,15 @@ static int is_rmap_pte(u64 pte)
202 == (PT_WRITABLE_MASK | PT_PRESENT_MASK); 193 == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
203} 194}
204 195
196static void set_shadow_pte(u64 *sptep, u64 spte)
197{
198#ifdef CONFIG_X86_64
199 set_64bit((unsigned long *)sptep, spte);
200#else
201 set_64bit((unsigned long long *)sptep, spte);
202#endif
203}
204
205static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 205static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
206 struct kmem_cache *base_cache, int min, 206 struct kmem_cache *base_cache, int min,
207 gfp_t gfp_flags) 207 gfp_t gfp_flags)
@@ -235,6 +235,14 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags)
235 goto out; 235 goto out;
236 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, 236 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
237 rmap_desc_cache, 1, gfp_flags); 237 rmap_desc_cache, 1, gfp_flags);
238 if (r)
239 goto out;
240 r = mmu_topup_memory_cache(&vcpu->mmu_page_cache,
241 mmu_page_cache, 4, gfp_flags);
242 if (r)
243 goto out;
244 r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
245 mmu_page_header_cache, 4, gfp_flags);
238out: 246out:
239 return r; 247 return r;
240} 248}
@@ -258,6 +266,8 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
258{ 266{
259 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); 267 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
260 mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); 268 mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
269 mmu_free_memory_cache(&vcpu->mmu_page_cache);
270 mmu_free_memory_cache(&vcpu->mmu_page_header_cache);
261} 271}
262 272
263static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, 273static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
@@ -433,19 +443,18 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
433 BUG_ON(!(*spte & PT_WRITABLE_MASK)); 443 BUG_ON(!(*spte & PT_WRITABLE_MASK));
434 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 444 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
435 rmap_remove(vcpu, spte); 445 rmap_remove(vcpu, spte);
436 kvm_arch_ops->tlb_flush(vcpu); 446 set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
437 *spte &= ~(u64)PT_WRITABLE_MASK; 447 kvm_flush_remote_tlbs(vcpu->kvm);
438 } 448 }
439} 449}
440 450
441#ifdef MMU_DEBUG 451#ifdef MMU_DEBUG
442static int is_empty_shadow_page(hpa_t page_hpa) 452static int is_empty_shadow_page(u64 *spt)
443{ 453{
444 u64 *pos; 454 u64 *pos;
445 u64 *end; 455 u64 *end;
446 456
447 for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64); 457 for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
448 pos != end; pos++)
449 if (*pos != 0) { 458 if (*pos != 0) {
450 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__, 459 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
451 pos, *pos); 460 pos, *pos);
@@ -455,13 +464,13 @@ static int is_empty_shadow_page(hpa_t page_hpa)
455} 464}
456#endif 465#endif
457 466
458static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) 467static void kvm_mmu_free_page(struct kvm_vcpu *vcpu,
468 struct kvm_mmu_page *page_head)
459{ 469{
460 struct kvm_mmu_page *page_head = page_header(page_hpa); 470 ASSERT(is_empty_shadow_page(page_head->spt));
461 471 list_del(&page_head->link);
462 ASSERT(is_empty_shadow_page(page_hpa)); 472 mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt);
463 page_head->page_hpa = page_hpa; 473 mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head);
464 list_move(&page_head->link, &vcpu->free_pages);
465 ++vcpu->kvm->n_free_mmu_pages; 474 ++vcpu->kvm->n_free_mmu_pages;
466} 475}
467 476
@@ -475,12 +484,15 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
475{ 484{
476 struct kvm_mmu_page *page; 485 struct kvm_mmu_page *page;
477 486
478 if (list_empty(&vcpu->free_pages)) 487 if (!vcpu->kvm->n_free_mmu_pages)
479 return NULL; 488 return NULL;
480 489
481 page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); 490 page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
482 list_move(&page->link, &vcpu->kvm->active_mmu_pages); 491 sizeof *page);
483 ASSERT(is_empty_shadow_page(page->page_hpa)); 492 page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE);
493 set_page_private(virt_to_page(page->spt), (unsigned long)page);
494 list_add(&page->link, &vcpu->kvm->active_mmu_pages);
495 ASSERT(is_empty_shadow_page(page->spt));
484 page->slot_bitmap = 0; 496 page->slot_bitmap = 0;
485 page->multimapped = 0; 497 page->multimapped = 0;
486 page->parent_pte = parent_pte; 498 page->parent_pte = parent_pte;
@@ -638,7 +650,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
638 u64 *pt; 650 u64 *pt;
639 u64 ent; 651 u64 ent;
640 652
641 pt = __va(page->page_hpa); 653 pt = page->spt;
642 654
643 if (page->role.level == PT_PAGE_TABLE_LEVEL) { 655 if (page->role.level == PT_PAGE_TABLE_LEVEL) {
644 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { 656 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
@@ -646,7 +658,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
646 rmap_remove(vcpu, &pt[i]); 658 rmap_remove(vcpu, &pt[i]);
647 pt[i] = 0; 659 pt[i] = 0;
648 } 660 }
649 kvm_arch_ops->tlb_flush(vcpu); 661 kvm_flush_remote_tlbs(vcpu->kvm);
650 return; 662 return;
651 } 663 }
652 664
@@ -659,6 +671,7 @@ static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
659 ent &= PT64_BASE_ADDR_MASK; 671 ent &= PT64_BASE_ADDR_MASK;
660 mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]); 672 mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
661 } 673 }
674 kvm_flush_remote_tlbs(vcpu->kvm);
662} 675}
663 676
664static void kvm_mmu_put_page(struct kvm_vcpu *vcpu, 677static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
@@ -685,12 +698,12 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
685 } 698 }
686 BUG_ON(!parent_pte); 699 BUG_ON(!parent_pte);
687 kvm_mmu_put_page(vcpu, page, parent_pte); 700 kvm_mmu_put_page(vcpu, page, parent_pte);
688 *parent_pte = 0; 701 set_shadow_pte(parent_pte, 0);
689 } 702 }
690 kvm_mmu_page_unlink_children(vcpu, page); 703 kvm_mmu_page_unlink_children(vcpu, page);
691 if (!page->root_count) { 704 if (!page->root_count) {
692 hlist_del(&page->hash_link); 705 hlist_del(&page->hash_link);
693 kvm_mmu_free_page(vcpu, page->page_hpa); 706 kvm_mmu_free_page(vcpu, page);
694 } else 707 } else
695 list_move(&page->link, &vcpu->kvm->active_mmu_pages); 708 list_move(&page->link, &vcpu->kvm->active_mmu_pages);
696} 709}
@@ -717,6 +730,17 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
717 return r; 730 return r;
718} 731}
719 732
733static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
734{
735 struct kvm_mmu_page *page;
736
737 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
738 pgprintk("%s: zap %lx %x\n",
739 __FUNCTION__, gfn, page->role.word);
740 kvm_mmu_zap_page(vcpu, page);
741 }
742}
743
720static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) 744static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
721{ 745{
722 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT)); 746 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
@@ -805,7 +829,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
805 return -ENOMEM; 829 return -ENOMEM;
806 } 830 }
807 831
808 table[index] = new_table->page_hpa | PT_PRESENT_MASK 832 table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
809 | PT_WRITABLE_MASK | PT_USER_MASK; 833 | PT_WRITABLE_MASK | PT_USER_MASK;
810 } 834 }
811 table_addr = table[index] & PT64_BASE_ADDR_MASK; 835 table_addr = table[index] & PT64_BASE_ADDR_MASK;
@@ -817,11 +841,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
817 int i; 841 int i;
818 struct kvm_mmu_page *page; 842 struct kvm_mmu_page *page;
819 843
844 if (!VALID_PAGE(vcpu->mmu.root_hpa))
845 return;
820#ifdef CONFIG_X86_64 846#ifdef CONFIG_X86_64
821 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) { 847 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
822 hpa_t root = vcpu->mmu.root_hpa; 848 hpa_t root = vcpu->mmu.root_hpa;
823 849
824 ASSERT(VALID_PAGE(root));
825 page = page_header(root); 850 page = page_header(root);
826 --page->root_count; 851 --page->root_count;
827 vcpu->mmu.root_hpa = INVALID_PAGE; 852 vcpu->mmu.root_hpa = INVALID_PAGE;
@@ -832,7 +857,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
832 hpa_t root = vcpu->mmu.pae_root[i]; 857 hpa_t root = vcpu->mmu.pae_root[i];
833 858
834 if (root) { 859 if (root) {
835 ASSERT(VALID_PAGE(root));
836 root &= PT64_BASE_ADDR_MASK; 860 root &= PT64_BASE_ADDR_MASK;
837 page = page_header(root); 861 page = page_header(root);
838 --page->root_count; 862 --page->root_count;
@@ -857,7 +881,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
857 ASSERT(!VALID_PAGE(root)); 881 ASSERT(!VALID_PAGE(root));
858 page = kvm_mmu_get_page(vcpu, root_gfn, 0, 882 page = kvm_mmu_get_page(vcpu, root_gfn, 0,
859 PT64_ROOT_LEVEL, 0, 0, NULL); 883 PT64_ROOT_LEVEL, 0, 0, NULL);
860 root = page->page_hpa; 884 root = __pa(page->spt);
861 ++page->root_count; 885 ++page->root_count;
862 vcpu->mmu.root_hpa = root; 886 vcpu->mmu.root_hpa = root;
863 return; 887 return;
@@ -878,7 +902,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
878 page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, 902 page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
879 PT32_ROOT_LEVEL, !is_paging(vcpu), 903 PT32_ROOT_LEVEL, !is_paging(vcpu),
880 0, NULL); 904 0, NULL);
881 root = page->page_hpa; 905 root = __pa(page->spt);
882 ++page->root_count; 906 ++page->root_count;
883 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; 907 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
884 } 908 }
@@ -928,9 +952,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
928 context->free = nonpaging_free; 952 context->free = nonpaging_free;
929 context->root_level = 0; 953 context->root_level = 0;
930 context->shadow_root_level = PT32E_ROOT_LEVEL; 954 context->shadow_root_level = PT32E_ROOT_LEVEL;
931 mmu_alloc_roots(vcpu); 955 context->root_hpa = INVALID_PAGE;
932 ASSERT(VALID_PAGE(context->root_hpa));
933 kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
934 return 0; 956 return 0;
935} 957}
936 958
@@ -944,59 +966,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu)
944{ 966{
945 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3); 967 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
946 mmu_free_roots(vcpu); 968 mmu_free_roots(vcpu);
947 if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
948 kvm_mmu_free_some_pages(vcpu);
949 mmu_alloc_roots(vcpu);
950 kvm_mmu_flush_tlb(vcpu);
951 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
952}
953
954static inline void set_pte_common(struct kvm_vcpu *vcpu,
955 u64 *shadow_pte,
956 gpa_t gaddr,
957 int dirty,
958 u64 access_bits,
959 gfn_t gfn)
960{
961 hpa_t paddr;
962
963 *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
964 if (!dirty)
965 access_bits &= ~PT_WRITABLE_MASK;
966
967 paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
968
969 *shadow_pte |= access_bits;
970
971 if (is_error_hpa(paddr)) {
972 *shadow_pte |= gaddr;
973 *shadow_pte |= PT_SHADOW_IO_MARK;
974 *shadow_pte &= ~PT_PRESENT_MASK;
975 return;
976 }
977
978 *shadow_pte |= paddr;
979
980 if (access_bits & PT_WRITABLE_MASK) {
981 struct kvm_mmu_page *shadow;
982
983 shadow = kvm_mmu_lookup_page(vcpu, gfn);
984 if (shadow) {
985 pgprintk("%s: found shadow page for %lx, marking ro\n",
986 __FUNCTION__, gfn);
987 access_bits &= ~PT_WRITABLE_MASK;
988 if (is_writeble_pte(*shadow_pte)) {
989 *shadow_pte &= ~PT_WRITABLE_MASK;
990 kvm_arch_ops->tlb_flush(vcpu);
991 }
992 }
993 }
994
995 if (access_bits & PT_WRITABLE_MASK)
996 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
997
998 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
999 rmap_add(vcpu, shadow_pte);
1000} 969}
1001 970
1002static void inject_page_fault(struct kvm_vcpu *vcpu, 971static void inject_page_fault(struct kvm_vcpu *vcpu,
@@ -1006,23 +975,6 @@ static void inject_page_fault(struct kvm_vcpu *vcpu,
1006 kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); 975 kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
1007} 976}
1008 977
1009static inline int fix_read_pf(u64 *shadow_ent)
1010{
1011 if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
1012 !(*shadow_ent & PT_USER_MASK)) {
1013 /*
1014 * If supervisor write protect is disabled, we shadow kernel
1015 * pages as user pages so we can trap the write access.
1016 */
1017 *shadow_ent |= PT_USER_MASK;
1018 *shadow_ent &= ~PT_WRITABLE_MASK;
1019
1020 return 1;
1021
1022 }
1023 return 0;
1024}
1025
1026static void paging_free(struct kvm_vcpu *vcpu) 978static void paging_free(struct kvm_vcpu *vcpu)
1027{ 979{
1028 nonpaging_free(vcpu); 980 nonpaging_free(vcpu);
@@ -1047,10 +999,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
1047 context->free = paging_free; 999 context->free = paging_free;
1048 context->root_level = level; 1000 context->root_level = level;
1049 context->shadow_root_level = level; 1001 context->shadow_root_level = level;
1050 mmu_alloc_roots(vcpu); 1002 context->root_hpa = INVALID_PAGE;
1051 ASSERT(VALID_PAGE(context->root_hpa));
1052 kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
1053 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
1054 return 0; 1003 return 0;
1055} 1004}
1056 1005
@@ -1069,10 +1018,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
1069 context->free = paging_free; 1018 context->free = paging_free;
1070 context->root_level = PT32_ROOT_LEVEL; 1019 context->root_level = PT32_ROOT_LEVEL;
1071 context->shadow_root_level = PT32E_ROOT_LEVEL; 1020 context->shadow_root_level = PT32E_ROOT_LEVEL;
1072 mmu_alloc_roots(vcpu); 1021 context->root_hpa = INVALID_PAGE;
1073 ASSERT(VALID_PAGE(context->root_hpa));
1074 kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
1075 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
1076 return 0; 1022 return 0;
1077} 1023}
1078 1024
@@ -1107,18 +1053,33 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
1107 1053
1108int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 1054int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
1109{ 1055{
1056 destroy_kvm_mmu(vcpu);
1057 return init_kvm_mmu(vcpu);
1058}
1059
1060int kvm_mmu_load(struct kvm_vcpu *vcpu)
1061{
1110 int r; 1062 int r;
1111 1063
1112 destroy_kvm_mmu(vcpu); 1064 spin_lock(&vcpu->kvm->lock);
1113 r = init_kvm_mmu(vcpu);
1114 if (r < 0)
1115 goto out;
1116 r = mmu_topup_memory_caches(vcpu); 1065 r = mmu_topup_memory_caches(vcpu);
1066 if (r)
1067 goto out;
1068 mmu_alloc_roots(vcpu);
1069 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
1070 kvm_mmu_flush_tlb(vcpu);
1117out: 1071out:
1072 spin_unlock(&vcpu->kvm->lock);
1118 return r; 1073 return r;
1119} 1074}
1075EXPORT_SYMBOL_GPL(kvm_mmu_load);
1076
1077void kvm_mmu_unload(struct kvm_vcpu *vcpu)
1078{
1079 mmu_free_roots(vcpu);
1080}
1120 1081
1121static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu, 1082static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
1122 struct kvm_mmu_page *page, 1083 struct kvm_mmu_page *page,
1123 u64 *spte) 1084 u64 *spte)
1124{ 1085{
@@ -1135,9 +1096,25 @@ static void mmu_pre_write_zap_pte(struct kvm_vcpu *vcpu,
1135 } 1096 }
1136 } 1097 }
1137 *spte = 0; 1098 *spte = 0;
1099 kvm_flush_remote_tlbs(vcpu->kvm);
1100}
1101
1102static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1103 struct kvm_mmu_page *page,
1104 u64 *spte,
1105 const void *new, int bytes)
1106{
1107 if (page->role.level != PT_PAGE_TABLE_LEVEL)
1108 return;
1109
1110 if (page->role.glevels == PT32_ROOT_LEVEL)
1111 paging32_update_pte(vcpu, page, spte, new, bytes);
1112 else
1113 paging64_update_pte(vcpu, page, spte, new, bytes);
1138} 1114}
1139 1115
1140void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes) 1116void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1117 const u8 *old, const u8 *new, int bytes)
1141{ 1118{
1142 gfn_t gfn = gpa >> PAGE_SHIFT; 1119 gfn_t gfn = gpa >> PAGE_SHIFT;
1143 struct kvm_mmu_page *page; 1120 struct kvm_mmu_page *page;
@@ -1149,6 +1126,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1149 unsigned pte_size; 1126 unsigned pte_size;
1150 unsigned page_offset; 1127 unsigned page_offset;
1151 unsigned misaligned; 1128 unsigned misaligned;
1129 unsigned quadrant;
1152 int level; 1130 int level;
1153 int flooded = 0; 1131 int flooded = 0;
1154 int npte; 1132 int npte;
@@ -1169,6 +1147,7 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1169 continue; 1147 continue;
1170 pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; 1148 pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
1171 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); 1149 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
1150 misaligned |= bytes < 4;
1172 if (misaligned || flooded) { 1151 if (misaligned || flooded) {
1173 /* 1152 /*
1174 * Misaligned accesses are too much trouble to fix 1153 * Misaligned accesses are too much trouble to fix
@@ -1200,21 +1179,20 @@ void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1200 page_offset <<= 1; 1179 page_offset <<= 1;
1201 npte = 2; 1180 npte = 2;
1202 } 1181 }
1182 quadrant = page_offset >> PAGE_SHIFT;
1203 page_offset &= ~PAGE_MASK; 1183 page_offset &= ~PAGE_MASK;
1184 if (quadrant != page->role.quadrant)
1185 continue;
1204 } 1186 }
1205 spte = __va(page->page_hpa); 1187 spte = &page->spt[page_offset / sizeof(*spte)];
1206 spte += page_offset / sizeof(*spte);
1207 while (npte--) { 1188 while (npte--) {
1208 mmu_pre_write_zap_pte(vcpu, page, spte); 1189 mmu_pte_write_zap_pte(vcpu, page, spte);
1190 mmu_pte_write_new_pte(vcpu, page, spte, new, bytes);
1209 ++spte; 1191 ++spte;
1210 } 1192 }
1211 } 1193 }
1212} 1194}
1213 1195
1214void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1215{
1216}
1217
1218int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) 1196int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1219{ 1197{
1220 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); 1198 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
@@ -1243,13 +1221,6 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu)
1243 struct kvm_mmu_page, link); 1221 struct kvm_mmu_page, link);
1244 kvm_mmu_zap_page(vcpu, page); 1222 kvm_mmu_zap_page(vcpu, page);
1245 } 1223 }
1246 while (!list_empty(&vcpu->free_pages)) {
1247 page = list_entry(vcpu->free_pages.next,
1248 struct kvm_mmu_page, link);
1249 list_del(&page->link);
1250 __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
1251 page->page_hpa = INVALID_PAGE;
1252 }
1253 free_page((unsigned long)vcpu->mmu.pae_root); 1224 free_page((unsigned long)vcpu->mmu.pae_root);
1254} 1225}
1255 1226
@@ -1260,18 +1231,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
1260 1231
1261 ASSERT(vcpu); 1232 ASSERT(vcpu);
1262 1233
1263 for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { 1234 vcpu->kvm->n_free_mmu_pages = KVM_NUM_MMU_PAGES;
1264 struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
1265
1266 INIT_LIST_HEAD(&page_header->link);
1267 if ((page = alloc_page(GFP_KERNEL)) == NULL)
1268 goto error_1;
1269 set_page_private(page, (unsigned long)page_header);
1270 page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
1271 memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
1272 list_add(&page_header->link, &vcpu->free_pages);
1273 ++vcpu->kvm->n_free_mmu_pages;
1274 }
1275 1235
1276 /* 1236 /*
1277 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. 1237 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
@@ -1296,7 +1256,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
1296{ 1256{
1297 ASSERT(vcpu); 1257 ASSERT(vcpu);
1298 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); 1258 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1299 ASSERT(list_empty(&vcpu->free_pages));
1300 1259
1301 return alloc_mmu_pages(vcpu); 1260 return alloc_mmu_pages(vcpu);
1302} 1261}
@@ -1305,7 +1264,6 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
1305{ 1264{
1306 ASSERT(vcpu); 1265 ASSERT(vcpu);
1307 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa)); 1266 ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
1308 ASSERT(!list_empty(&vcpu->free_pages));
1309 1267
1310 return init_kvm_mmu(vcpu); 1268 return init_kvm_mmu(vcpu);
1311} 1269}
@@ -1331,7 +1289,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
1331 if (!test_bit(slot, &page->slot_bitmap)) 1289 if (!test_bit(slot, &page->slot_bitmap))
1332 continue; 1290 continue;
1333 1291
1334 pt = __va(page->page_hpa); 1292 pt = page->spt;
1335 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 1293 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
1336 /* avoid RMW */ 1294 /* avoid RMW */
1337 if (pt[i] & PT_WRITABLE_MASK) { 1295 if (pt[i] & PT_WRITABLE_MASK) {
@@ -1354,7 +1312,7 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu)
1354 } 1312 }
1355 1313
1356 mmu_free_memory_caches(vcpu); 1314 mmu_free_memory_caches(vcpu);
1357 kvm_arch_ops->tlb_flush(vcpu); 1315 kvm_flush_remote_tlbs(vcpu->kvm);
1358 init_kvm_mmu(vcpu); 1316 init_kvm_mmu(vcpu);
1359} 1317}
1360 1318
@@ -1364,6 +1322,10 @@ void kvm_mmu_module_exit(void)
1364 kmem_cache_destroy(pte_chain_cache); 1322 kmem_cache_destroy(pte_chain_cache);
1365 if (rmap_desc_cache) 1323 if (rmap_desc_cache)
1366 kmem_cache_destroy(rmap_desc_cache); 1324 kmem_cache_destroy(rmap_desc_cache);
1325 if (mmu_page_cache)
1326 kmem_cache_destroy(mmu_page_cache);
1327 if (mmu_page_header_cache)
1328 kmem_cache_destroy(mmu_page_header_cache);
1367} 1329}
1368 1330
1369int kvm_mmu_module_init(void) 1331int kvm_mmu_module_init(void)
@@ -1379,6 +1341,18 @@ int kvm_mmu_module_init(void)
1379 if (!rmap_desc_cache) 1341 if (!rmap_desc_cache)
1380 goto nomem; 1342 goto nomem;
1381 1343
1344 mmu_page_cache = kmem_cache_create("kvm_mmu_page",
1345 PAGE_SIZE,
1346 PAGE_SIZE, 0, NULL, NULL);
1347 if (!mmu_page_cache)
1348 goto nomem;
1349
1350 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
1351 sizeof(struct kvm_mmu_page),
1352 0, 0, NULL, NULL);
1353 if (!mmu_page_header_cache)
1354 goto nomem;
1355
1382 return 0; 1356 return 0;
1383 1357
1384nomem: 1358nomem:
@@ -1482,7 +1456,7 @@ static int count_writable_mappings(struct kvm_vcpu *vcpu)
1482 int i; 1456 int i;
1483 1457
1484 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) { 1458 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1485 u64 *pt = __va(page->page_hpa); 1459 u64 *pt = page->spt;
1486 1460
1487 if (page->role.level != PT_PAGE_TABLE_LEVEL) 1461 if (page->role.level != PT_PAGE_TABLE_LEVEL)
1488 continue; 1462 continue;