aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c210
1 files changed, 193 insertions, 17 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 57c7580e7f98..d88659ae7778 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -147,6 +147,10 @@ struct kvm_shadow_walk {
147 u64 addr, u64 *spte, int level); 147 u64 addr, u64 *spte, int level);
148}; 148};
149 149
150struct kvm_unsync_walk {
151 int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
152};
153
150typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); 154typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
151 155
152static struct kmem_cache *pte_chain_cache; 156static struct kmem_cache *pte_chain_cache;
@@ -654,8 +658,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
654 658
655 if (write_protected) 659 if (write_protected)
656 kvm_flush_remote_tlbs(kvm); 660 kvm_flush_remote_tlbs(kvm);
657
658 account_shadowed(kvm, gfn);
659} 661}
660 662
661static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) 663static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -908,6 +910,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
908{ 910{
909} 911}
910 912
913static int mmu_unsync_walk(struct kvm_mmu_page *sp,
914 struct kvm_unsync_walk *walker)
915{
916 int i, ret;
917
918 if (!sp->unsync_children)
919 return 0;
920
921 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
922 u64 ent = sp->spt[i];
923
924 if (is_shadow_present_pte(ent)) {
925 struct kvm_mmu_page *child;
926 child = page_header(ent & PT64_BASE_ADDR_MASK);
927
928 if (child->unsync_children) {
929 ret = mmu_unsync_walk(child, walker);
930 if (ret)
931 return ret;
932 }
933
934 if (child->unsync) {
935 ret = walker->entry(child, walker);
936 if (ret)
937 return ret;
938 }
939 }
940 }
941
942 if (i == PT64_ENT_PER_PAGE)
943 sp->unsync_children = 0;
944
945 return 0;
946}
947
911static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) 948static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
912{ 949{
913 unsigned index; 950 unsigned index;
@@ -928,6 +965,59 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
928 return NULL; 965 return NULL;
929} 966}
930 967
968static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
969{
970 WARN_ON(!sp->unsync);
971 sp->unsync = 0;
972 --kvm->stat.mmu_unsync;
973}
974
975static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
976
977static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
978{
979 if (sp->role.glevels != vcpu->arch.mmu.root_level) {
980 kvm_mmu_zap_page(vcpu->kvm, sp);
981 return 1;
982 }
983
984 rmap_write_protect(vcpu->kvm, sp->gfn);
985 if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
986 kvm_mmu_zap_page(vcpu->kvm, sp);
987 return 1;
988 }
989
990 kvm_mmu_flush_tlb(vcpu);
991 kvm_unlink_unsync_page(vcpu->kvm, sp);
992 return 0;
993}
994
995struct sync_walker {
996 struct kvm_vcpu *vcpu;
997 struct kvm_unsync_walk walker;
998};
999
1000static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1001{
1002 struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
1003 walker);
1004 struct kvm_vcpu *vcpu = sync_walk->vcpu;
1005
1006 kvm_sync_page(vcpu, sp);
1007 return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
1008}
1009
1010static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1011{
1012 struct sync_walker walker = {
1013 .walker = { .entry = mmu_sync_fn, },
1014 .vcpu = vcpu,
1015 };
1016
1017 while (mmu_unsync_walk(sp, &walker.walker))
1018 cond_resched_lock(&vcpu->kvm->mmu_lock);
1019}
1020
931static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 1021static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
932 gfn_t gfn, 1022 gfn_t gfn,
933 gva_t gaddr, 1023 gva_t gaddr,
@@ -941,7 +1031,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
941 unsigned quadrant; 1031 unsigned quadrant;
942 struct hlist_head *bucket; 1032 struct hlist_head *bucket;
943 struct kvm_mmu_page *sp; 1033 struct kvm_mmu_page *sp;
944 struct hlist_node *node; 1034 struct hlist_node *node, *tmp;
945 1035
946 role.word = 0; 1036 role.word = 0;
947 role.glevels = vcpu->arch.mmu.root_level; 1037 role.glevels = vcpu->arch.mmu.root_level;
@@ -957,8 +1047,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
957 gfn, role.word); 1047 gfn, role.word);
958 index = kvm_page_table_hashfn(gfn); 1048 index = kvm_page_table_hashfn(gfn);
959 bucket = &vcpu->kvm->arch.mmu_page_hash[index]; 1049 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
960 hlist_for_each_entry(sp, node, bucket, hash_link) 1050 hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link)
961 if (sp->gfn == gfn && sp->role.word == role.word) { 1051 if (sp->gfn == gfn) {
1052 if (sp->unsync)
1053 if (kvm_sync_page(vcpu, sp))
1054 continue;
1055
1056 if (sp->role.word != role.word)
1057 continue;
1058
1059 if (sp->unsync_children)
1060 set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
1061
962 mmu_page_add_parent_pte(vcpu, sp, parent_pte); 1062 mmu_page_add_parent_pte(vcpu, sp, parent_pte);
963 pgprintk("%s: found\n", __func__); 1063 pgprintk("%s: found\n", __func__);
964 return sp; 1064 return sp;
@@ -971,8 +1071,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
971 sp->gfn = gfn; 1071 sp->gfn = gfn;
972 sp->role = role; 1072 sp->role = role;
973 hlist_add_head(&sp->hash_link, bucket); 1073 hlist_add_head(&sp->hash_link, bucket);
974 if (!metaphysical) 1074 if (!metaphysical) {
975 rmap_write_protect(vcpu->kvm, gfn); 1075 rmap_write_protect(vcpu->kvm, gfn);
1076 account_shadowed(vcpu->kvm, gfn);
1077 }
976 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) 1078 if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
977 vcpu->arch.mmu.prefetch_page(vcpu, sp); 1079 vcpu->arch.mmu.prefetch_page(vcpu, sp);
978 else 1080 else
@@ -1078,14 +1180,47 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1078 } 1180 }
1079} 1181}
1080 1182
1183struct zap_walker {
1184 struct kvm_unsync_walk walker;
1185 struct kvm *kvm;
1186 int zapped;
1187};
1188
1189static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
1190{
1191 struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
1192 walker);
1193 kvm_mmu_zap_page(zap_walk->kvm, sp);
1194 zap_walk->zapped = 1;
1195 return 0;
1196}
1197
1198static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
1199{
1200 struct zap_walker walker = {
1201 .walker = { .entry = mmu_zap_fn, },
1202 .kvm = kvm,
1203 .zapped = 0,
1204 };
1205
1206 if (sp->role.level == PT_PAGE_TABLE_LEVEL)
1207 return 0;
1208 mmu_unsync_walk(sp, &walker.walker);
1209 return walker.zapped;
1210}
1211
1081static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1212static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1082{ 1213{
1214 int ret;
1083 ++kvm->stat.mmu_shadow_zapped; 1215 ++kvm->stat.mmu_shadow_zapped;
1216 ret = mmu_zap_unsync_children(kvm, sp);
1084 kvm_mmu_page_unlink_children(kvm, sp); 1217 kvm_mmu_page_unlink_children(kvm, sp);
1085 kvm_mmu_unlink_parents(kvm, sp); 1218 kvm_mmu_unlink_parents(kvm, sp);
1086 kvm_flush_remote_tlbs(kvm); 1219 kvm_flush_remote_tlbs(kvm);
1087 if (!sp->role.invalid && !sp->role.metaphysical) 1220 if (!sp->role.invalid && !sp->role.metaphysical)
1088 unaccount_shadowed(kvm, sp->gfn); 1221 unaccount_shadowed(kvm, sp->gfn);
1222 if (sp->unsync)
1223 kvm_unlink_unsync_page(kvm, sp);
1089 if (!sp->root_count) { 1224 if (!sp->root_count) {
1090 hlist_del(&sp->hash_link); 1225 hlist_del(&sp->hash_link);
1091 kvm_mmu_free_page(kvm, sp); 1226 kvm_mmu_free_page(kvm, sp);
@@ -1095,7 +1230,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1095 kvm_reload_remote_mmus(kvm); 1230 kvm_reload_remote_mmus(kvm);
1096 } 1231 }
1097 kvm_mmu_reset_last_pte_updated(kvm); 1232 kvm_mmu_reset_last_pte_updated(kvm);
1098 return 0; 1233 return ret;
1099} 1234}
1100 1235
1101/* 1236/*
@@ -1201,10 +1336,58 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1201 return page; 1336 return page;
1202} 1337}
1203 1338
1339static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1340{
1341 sp->unsync_children = 1;
1342 return 1;
1343}
1344
1345static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1346{
1347 unsigned index;
1348 struct hlist_head *bucket;
1349 struct kvm_mmu_page *s;
1350 struct hlist_node *node, *n;
1351
1352 index = kvm_page_table_hashfn(sp->gfn);
1353 bucket = &vcpu->kvm->arch.mmu_page_hash[index];
1354 /* don't unsync if pagetable is shadowed with multiple roles */
1355 hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
1356 if (s->gfn != sp->gfn || s->role.metaphysical)
1357 continue;
1358 if (s->role.word != sp->role.word)
1359 return 1;
1360 }
1361 mmu_parent_walk(vcpu, sp, unsync_walk_fn);
1362 ++vcpu->kvm->stat.mmu_unsync;
1363 sp->unsync = 1;
1364 mmu_convert_notrap(sp);
1365 return 0;
1366}
1367
1368static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
1369 bool can_unsync)
1370{
1371 struct kvm_mmu_page *shadow;
1372
1373 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
1374 if (shadow) {
1375 if (shadow->role.level != PT_PAGE_TABLE_LEVEL)
1376 return 1;
1377 if (shadow->unsync)
1378 return 0;
1379 if (can_unsync)
1380 return kvm_unsync_page(vcpu, shadow);
1381 return 1;
1382 }
1383 return 0;
1384}
1385
1204static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1386static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1205 unsigned pte_access, int user_fault, 1387 unsigned pte_access, int user_fault,
1206 int write_fault, int dirty, int largepage, 1388 int write_fault, int dirty, int largepage,
1207 gfn_t gfn, pfn_t pfn, bool speculative) 1389 gfn_t gfn, pfn_t pfn, bool speculative,
1390 bool can_unsync)
1208{ 1391{
1209 u64 spte; 1392 u64 spte;
1210 int ret = 0; 1393 int ret = 0;
@@ -1231,7 +1414,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1231 1414
1232 if ((pte_access & ACC_WRITE_MASK) 1415 if ((pte_access & ACC_WRITE_MASK)
1233 || (write_fault && !is_write_protection(vcpu) && !user_fault)) { 1416 || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
1234 struct kvm_mmu_page *shadow;
1235 1417
1236 if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) { 1418 if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) {
1237 ret = 1; 1419 ret = 1;
@@ -1241,8 +1423,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1241 1423
1242 spte |= PT_WRITABLE_MASK; 1424 spte |= PT_WRITABLE_MASK;
1243 1425
1244 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1426 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
1245 if (shadow) {
1246 pgprintk("%s: found shadow page for %lx, marking ro\n", 1427 pgprintk("%s: found shadow page for %lx, marking ro\n",
1247 __func__, gfn); 1428 __func__, gfn);
1248 ret = 1; 1429 ret = 1;
@@ -1260,7 +1441,6 @@ set_pte:
1260 return ret; 1441 return ret;
1261} 1442}
1262 1443
1263
1264static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, 1444static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1265 unsigned pt_access, unsigned pte_access, 1445 unsigned pt_access, unsigned pte_access,
1266 int user_fault, int write_fault, int dirty, 1446 int user_fault, int write_fault, int dirty,
@@ -1298,7 +1478,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1298 } 1478 }
1299 } 1479 }
1300 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, 1480 if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
1301 dirty, largepage, gfn, pfn, speculative)) { 1481 dirty, largepage, gfn, pfn, speculative, true)) {
1302 if (write_fault) 1482 if (write_fault)
1303 *ptwrite = 1; 1483 *ptwrite = 1;
1304 kvm_x86_ops->tlb_flush(vcpu); 1484 kvm_x86_ops->tlb_flush(vcpu);
@@ -1518,10 +1698,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
1518 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); 1698 vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
1519} 1699}
1520 1700
1521static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1522{
1523}
1524
1525static void mmu_sync_roots(struct kvm_vcpu *vcpu) 1701static void mmu_sync_roots(struct kvm_vcpu *vcpu)
1526{ 1702{
1527 int i; 1703 int i;