aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2008-09-23 12:18:39 -0400
committerAvi Kivity <avi@redhat.com>2008-10-15 08:25:25 -0400
commit4731d4c7a07769cf2926c327177b97bb8c68cafc (patch)
treec732e9de4dbb35c74c158962771b6804dd8db153 /arch/x86
parent6844dec6948679d084f054235fee19ba4e3a3096 (diff)
KVM: MMU: out of sync shadow core
Allow guest pagetables to go out of sync. Instead of emulating write accesses to guest pagetables, or unshadowing them, we un-write-protect the page table and allow the guest to modify it at will. We rely on invlpg executions to synchronize individual ptes, and will synchronize the entire pagetable on tlb flushes. Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kvm/mmu.c210
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/x86.c3
3 files changed, 197 insertions, 18 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 57c7580e7f98..d88659ae7778 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -147,6 +147,10 @@ struct kvm_shadow_walk {
147 u64 addr, u64 *spte, int level); 147 u64 addr, u64 *spte, int level);
148}; 148};
149 149
150struct kvm_unsync_walk {
151 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
152};
153
150typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); 154typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
151 155
152static struct kmem_cache *pte_chain_cache; 156static struct kmem_cache *pte_chain_cache;
@@ -654,8 +658,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
654 658
655 if (write_protected) 659 if (write_protected)
656 kvm_flush_remote_tlbs(kvm); 660 kvm_flush_remote_tlbs(kvm);
657
658 account_shadowed(kvm, gfn);
659} 661}
660 662
661static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 663static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -908,6 +910,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
908{ 910{
909} 911}
910 912
913static int mmu_unsync_walk(struct kvm_mmu_page *sp,
914 struct kvm_unsync_walk *walker)
915{
916 int i, ret;
917
918 if (!sp->unsync_children)
919 return 0;
920
921 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
922 u64 ent = sp->spt[i];
923
924 if (is_shadow_present_pte(ent)) {
925 struct kvm_mmu_page *child;
926 child = page_header(ent & PT64_BASE_ADDR_MASK);
927
928 if (child->unsync_children) {
929 ret = mmu_unsync_walk(child, walker);
930 if (ret)
931 return ret;
932 }
933
934 if (child->unsync) {
935 ret = walker->entry(child, walker);
936 if (ret)
937 return ret;
938 }
939 }
940 }
941
942 if (i == PT64_ENT_PER_PAGE)
943 sp->unsync_children = 0;
944
945 return 0;
946}
947
911static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 948static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
912{ 949{
913 unsigned index; 950 unsigned index;
@@ -928,6 +965,59 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
928 return NULL; 965 return NULL;
929} 966}
930 967
968static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
969{
970 WARN_ON(!sp->unsync);
971 sp->unsync = 0;
972 --kvm->stat.mmu_unsync;
973}
974
975static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
976
977static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
978{
979 if (sp->role.glevels != vcpu->arch.mmu.root_level) {
980 kvm_mmu_zap_page(vcpu->kvm, sp);
981 return 1;
982 }
983
984 rmap_write_protect(vcpu->kvm, sp->gfn);
985 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
986 kvm_mmu_zap_page(vcpu->kvm, sp);
987 return 1;
988 }
989
990 kvm_mmu_flush_tlb(vcpu);
991 kvm_unlink_unsync_page(vcpu->kvm, sp);
992 return 0;
993}
994
995struct sync_walker {
996 struct kvm_vcpu *vcpu;
997 struct kvm_unsync_walk walker;
998};
999
1000static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1001{
1002 struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
1003 walker);
1004 struct kvm_vcpu *vcpu = sync_walk->vcpu;
1005
1006 kvm_sync_page(vcpu, sp);
1007 return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
1008}
1009
1010static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1011{
1012 struct sync_walker walker = {
1013 .walker = { .entry = mmu_sync_fn, },
1014 .vcpu = vcpu,
1015 };
1016
1017 while (mmu_unsync_walk(sp, &walker.walker))
1018 cond_resched_lock(&vcpu->kvm->mmu_lock);
1019}
1020
931static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1021static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
932 gfn_t gfn, 1022 gfn_t gfn,
933 gva_t gaddr, 1023 gva_t gaddr,
@@ -941,7 +1031,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
941 unsigned quadrant; 1031 unsigned quadrant;
942 struct hlist_head *bucket; 1032 struct hlist_head *bucket;
943 struct kvm_mmu_page *sp; 1033 struct kvm_mmu_page *sp;
944 struct hlist_node *node; 1034 struct hlist_node *node, *tmp;
945 1035
946 role.word = 0; 1036 role.word = 0;
947 role.glevels = vcpu->arch.mmu.root_level; 1037 role.glevels = vcpu->arch.mmu.root_level;
@@ -957,8 +1047,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
957 gfn, role.word); 1047 gfn, role.word);
958 index = kvm_page_table_hashfn(gfn); 1048 index = kvm_page_table_hashfn(gfn);
959 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1049 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
960 hlist_for_each_entry(sp, node, bucket, hash_link) 1050 hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
961 if (sp->gfn == gfn && sp->role.word == role.word) { 1051 if (sp->gfn == gfn) {
1052 if (sp->unsync)
1053 if (kvm_sync_page(vcpu, sp))
1054 continue;
1055
1056 if (sp->role.word != role.word)
1057 continue;
1058
1059 if (sp->unsync_children)
1060 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
1061
962 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1062 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
963 pgprintk("%s: found\n", __func__); 1063 pgprintk("%s: found\n", __func__);
964 return sp; 1064 return sp;
@@ -971,8 +1071,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
971 sp->gfn = gfn; 1071 sp->gfn = gfn;
972 sp->role = role; 1072 sp->role = role;
973 hlist_add_head(&sp->hash_link, bucket); 1073 hlist_add_head(&sp->hash_link, bucket);
974 if (!metaphysical) 1074 if (!metaphysical) {
975 rmap_write_protect(vcpu->kvm, gfn); 1075 rmap_write_protect(vcpu->kvm, gfn);
1076 account_shadowed(vcpu->kvm, gfn);
1077 }
976 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) 1078 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
977 vcpu->arch.mmu.prefetch_page(vcpu, sp); 1079 vcpu->arch.mmu.prefetch_page(vcpu, sp);
978 else 1080 else
@@ -1078,14 +1180,47 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1078 } 1180 }
1079} 1181}
1080 1182
1183struct zap_walker {
1184 struct kvm_unsync_walk walker;
1185 struct kvm *kvm;
1186 int zapped;
1187};
1188
1189static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1190{
1191 struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
1192 walker);
1193 kvm_mmu_zap_page(zap_walk->kvm, sp);
1194 zap_walk->zapped = 1;
1195 return 0;
1196}
1197
1198static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
1199{
1200 struct zap_walker walker = {
1201 .walker = { .entry = mmu_zap_fn, },
1202 .kvm = kvm,
1203 .zapped = 0,
1204 };
1205
1206 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
1207 return 0;
1208 mmu_unsync_walk(sp, &walker.walker);
1209 return walker.zapped;
1210}
1211
1081static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1212static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1082{ 1213{
1214 int ret;
1083 ++kvm->stat.mmu_shadow_zapped; 1215 ++kvm->stat.mmu_shadow_zapped;
1216 ret = mmu_zap_unsync_children(kvm, sp);
1084 kvm_mmu_page_unlink_children(kvm, sp); 1217 kvm_mmu_page_unlink_children(kvm, sp);
1085 kvm_mmu_unlink_parents(kvm, sp); 1218 kvm_mmu_unlink_parents(kvm, sp);
1086 kvm_flush_remote_tlbs(kvm); 1219 kvm_flush_remote_tlbs(kvm);
1087 if (!sp->role.invalid && !sp->role.metaphysical) 1220 if (!sp->role.invalid && !sp->role.metaphysical)
1088 unaccount_shadowed(kvm, sp->gfn); 1221 unaccount_shadowed(kvm, sp->gfn);
1222 if (sp->unsync)
1223 kvm_unlink_unsync_page(kvm, sp);
1089 if (!sp->root_count) { 1224 if (!sp->root_count) {
1090 hlist_del(&sp->hash_link); 1225 hlist_del(&sp->hash_link);
1091 kvm_mmu_free_page(kvm, sp); 1226 kvm_mmu_free_page(kvm, sp);
@@ -1095,7 +1230,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1095 kvm_reload_remote_mmus(kvm); 1230 kvm_reload_remote_mmus(kvm);
1096 } 1231 }
1097 kvm_mmu_reset_last_pte_updated(kvm); 1232 kvm_mmu_reset_last_pte_updated(kvm);
1098 return 0; 1233 return ret;
1099} 1234}
1100 1235
1101/* 1236/*
@@ -1201,10 +1336,58 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1201 return page; 1336 return page;
1202} 1337}
1203 1338
1339static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1340{
1341 sp->unsync_children = 1;
1342 return 1;
1343}
1344
1345static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1346{
1347 unsigned index;
1348 struct hlist_head *bucket;
1349 struct kvm_mmu_page *s;
1350 struct hlist_node *node, *n;
1351
1352 index = kvm_page_table_hashfn(sp->gfn);
1353 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1354 /* don't unsync if pagetable is shadowed with multiple roles */
1355 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
1356 if (s->gfn != sp->gfn || s->role.metaphysical)
1357 continue;
1358 if (s->role.word != sp->role.word)
1359 return 1;
1360 }
1361 mmu_parent_walk(vcpu, sp, unsync_walk_fn);
1362 ++vcpu->kvm->stat.mmu_unsync;
1363 sp->unsync = 1;
1364 mmu_convert_notrap(sp);
1365 return 0;
1366}
1367
1368static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1369 bool can_unsync)
1370{
1371 struct kvm_mmu_page *shadow;
1372
1373 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
1374 if (shadow) {
1375 if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
1376 return 1;
1377 if (shadow->unsync)
1378 return 0;
1379 if (can_unsync)
1380 return kvm_unsync_page(vcpu, shadow);
1381 return 1;
1382 }
1383 return 0;
1384}
1385
1204static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1386static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1205 unsigned pte_access, int user_fault, 1387 unsigned pte_access, int user_fault,
1206 int write_fault, int dirty, int largepage, 1388 int write_fault, int dirty, int largepage,
1207 gfn_t gfn, pfn_t pfn, bool speculative) 1389 gfn_t gfn, pfn_t pfn, bool speculative,
1390 bool can_unsync)
1208{ 1391{
1209 u64 spte; 1392 u64 spte;
1210 int ret = 0; 1393 int ret = 0;
@@ -1231,7 +1414,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1231 1414
1232 if ((pte_access & ACC_WRITE_MASK) 1415 if ((pte_access & ACC_WRITE_MASK)
1233 || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1416 || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
1234 struct kvm_mmu_page *shadow;
1235 1417
1236 if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) { 1418 if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
1237 ret = 1; 1419 ret = 1;
@@ -1241,8 +1423,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1241 1423
1242 spte |= PT_WRITABLE_MASK; 1424 spte |= PT_WRITABLE_MASK;
1243 1425
1244 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1426 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
1245 if (shadow) {
1246 pgprintk("%s: found shadow page for %lx, marking ro\n", 1427 pgprintk("%s: found shadow page for %lx, marking ro\n",
1247 __func__, gfn); 1428 __func__, gfn);
1248 ret = 1; 1429 ret = 1;
@@ -1260,7 +1441,6 @@ set_pte:
1260 return ret; 1441 return ret;
1261} 1442}
1262 1443
1263
1264static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1444static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1265 unsigned pt_access, unsigned pte_access, 1445 unsigned pt_access, unsigned pte_access,
1266 int user_fault, int write_fault, int dirty, 1446 int user_fault, int write_fault, int dirty,
@@ -1298,7 +1478,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1298 } 1478 }
1299 } 1479 }
1300 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1480 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1301 dirty, largepage, gfn, pfn, speculative)) { 1481 dirty, largepage, gfn, pfn, speculative, true)) {
1302 if (write_fault) 1482 if (write_fault)
1303 *ptwrite = 1; 1483 *ptwrite = 1;
1304 kvm_x86_ops->tlb_flush(vcpu); 1484 kvm_x86_ops->tlb_flush(vcpu);
@@ -1518,10 +1698,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1518 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); 1698 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
1519} 1699}
1520 1700
1521static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1522{
1523}
1524
1525static void mmu_sync_roots(struct kvm_vcpu *vcpu) 1701static void mmu_sync_roots(struct kvm_vcpu *vcpu)
1526{ 1702{
1527 int i; 1703 int i;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index dc169e8148b1..613ec9aa674a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -580,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
580 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); 580 pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
581 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, 581 set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
582 is_dirty_pte(gpte), 0, gfn, 582 is_dirty_pte(gpte), 0, gfn,
583 spte_to_pfn(sp->spt[i]), true); 583 spte_to_pfn(sp->spt[i]), true, false);
584 } 584 }
585 585
586 return !nr_present; 586 return !nr_present;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efee85ba07e5..1c5864ac0837 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -101,6 +101,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
101 { "mmu_flooded", VM_STAT(mmu_flooded) }, 101 { "mmu_flooded", VM_STAT(mmu_flooded) },
102 { "mmu_recycled", VM_STAT(mmu_recycled) }, 102 { "mmu_recycled", VM_STAT(mmu_recycled) },
103 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, 103 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
104 { "mmu_unsync", VM_STAT(mmu_unsync) },
104 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 105 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
105 { "largepages", VM_STAT(lpages) }, 106 { "largepages", VM_STAT(lpages) },
106 { NULL } 107 { NULL }
@@ -3120,6 +3121,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3120 if (vcpu->requests) { 3121 if (vcpu->requests) {
3121 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 3122 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
3122 __kvm_migrate_timers(vcpu); 3123 __kvm_migrate_timers(vcpu);
3124 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
3125 kvm_mmu_sync_roots(vcpu);
3123 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) 3126 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
3124 kvm_x86_ops->tlb_flush(vcpu); 3127 kvm_x86_ops->tlb_flush(vcpu);
3125 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 3128 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,