diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 210 |
1 files changed, 193 insertions, 17 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 57c7580e7f98..d88659ae7778 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -147,6 +147,10 @@ struct kvm_shadow_walk { | |||
147 | u64 addr, u64 *spte, int level); | 147 | u64 addr, u64 *spte, int level); |
148 | }; | 148 | }; |
149 | 149 | ||
150 | struct kvm_unsync_walk { | ||
151 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
152 | }; | ||
153 | |||
150 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | 154 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); |
151 | 155 | ||
152 | static struct kmem_cache *pte_chain_cache; | 156 | static struct kmem_cache *pte_chain_cache; |
@@ -654,8 +658,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
654 | 658 | ||
655 | if (write_protected) | 659 | if (write_protected) |
656 | kvm_flush_remote_tlbs(kvm); | 660 | kvm_flush_remote_tlbs(kvm); |
657 | |||
658 | account_shadowed(kvm, gfn); | ||
659 | } | 661 | } |
660 | 662 | ||
661 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 663 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) |
@@ -908,6 +910,41 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
908 | { | 910 | { |
909 | } | 911 | } |
910 | 912 | ||
913 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
914 | struct kvm_unsync_walk *walker) | ||
915 | { | ||
916 | int i, ret; | ||
917 | |||
918 | if (!sp->unsync_children) | ||
919 | return 0; | ||
920 | |||
921 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
922 | u64 ent = sp->spt[i]; | ||
923 | |||
924 | if (is_shadow_present_pte(ent)) { | ||
925 | struct kvm_mmu_page *child; | ||
926 | child = page_header(ent & PT64_BASE_ADDR_MASK); | ||
927 | |||
928 | if (child->unsync_children) { | ||
929 | ret = mmu_unsync_walk(child, walker); | ||
930 | if (ret) | ||
931 | return ret; | ||
932 | } | ||
933 | |||
934 | if (child->unsync) { | ||
935 | ret = walker->entry(child, walker); | ||
936 | if (ret) | ||
937 | return ret; | ||
938 | } | ||
939 | } | ||
940 | } | ||
941 | |||
942 | if (i == PT64_ENT_PER_PAGE) | ||
943 | sp->unsync_children = 0; | ||
944 | |||
945 | return 0; | ||
946 | } | ||
947 | |||
911 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | 948 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) |
912 | { | 949 | { |
913 | unsigned index; | 950 | unsigned index; |
@@ -928,6 +965,59 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
928 | return NULL; | 965 | return NULL; |
929 | } | 966 | } |
930 | 967 | ||
968 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
969 | { | ||
970 | WARN_ON(!sp->unsync); | ||
971 | sp->unsync = 0; | ||
972 | --kvm->stat.mmu_unsync; | ||
973 | } | ||
974 | |||
975 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | ||
976 | |||
977 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
978 | { | ||
979 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | ||
980 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
981 | return 1; | ||
982 | } | ||
983 | |||
984 | rmap_write_protect(vcpu->kvm, sp->gfn); | ||
985 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | ||
986 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
987 | return 1; | ||
988 | } | ||
989 | |||
990 | kvm_mmu_flush_tlb(vcpu); | ||
991 | kvm_unlink_unsync_page(vcpu->kvm, sp); | ||
992 | return 0; | ||
993 | } | ||
994 | |||
995 | struct sync_walker { | ||
996 | struct kvm_vcpu *vcpu; | ||
997 | struct kvm_unsync_walk walker; | ||
998 | }; | ||
999 | |||
1000 | static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
1001 | { | ||
1002 | struct sync_walker *sync_walk = container_of(walk, struct sync_walker, | ||
1003 | walker); | ||
1004 | struct kvm_vcpu *vcpu = sync_walk->vcpu; | ||
1005 | |||
1006 | kvm_sync_page(vcpu, sp); | ||
1007 | return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); | ||
1008 | } | ||
1009 | |||
1010 | static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
1011 | { | ||
1012 | struct sync_walker walker = { | ||
1013 | .walker = { .entry = mmu_sync_fn, }, | ||
1014 | .vcpu = vcpu, | ||
1015 | }; | ||
1016 | |||
1017 | while (mmu_unsync_walk(sp, &walker.walker)) | ||
1018 | cond_resched_lock(&vcpu->kvm->mmu_lock); | ||
1019 | } | ||
1020 | |||
931 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1021 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
932 | gfn_t gfn, | 1022 | gfn_t gfn, |
933 | gva_t gaddr, | 1023 | gva_t gaddr, |
@@ -941,7 +1031,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
941 | unsigned quadrant; | 1031 | unsigned quadrant; |
942 | struct hlist_head *bucket; | 1032 | struct hlist_head *bucket; |
943 | struct kvm_mmu_page *sp; | 1033 | struct kvm_mmu_page *sp; |
944 | struct hlist_node *node; | 1034 | struct hlist_node *node, *tmp; |
945 | 1035 | ||
946 | role.word = 0; | 1036 | role.word = 0; |
947 | role.glevels = vcpu->arch.mmu.root_level; | 1037 | role.glevels = vcpu->arch.mmu.root_level; |
@@ -957,8 +1047,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
957 | gfn, role.word); | 1047 | gfn, role.word); |
958 | index = kvm_page_table_hashfn(gfn); | 1048 | index = kvm_page_table_hashfn(gfn); |
959 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1049 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
960 | hlist_for_each_entry(sp, node, bucket, hash_link) | 1050 | hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) |
961 | if (sp->gfn == gfn && sp->role.word == role.word) { | 1051 | if (sp->gfn == gfn) { |
1052 | if (sp->unsync) | ||
1053 | if (kvm_sync_page(vcpu, sp)) | ||
1054 | continue; | ||
1055 | |||
1056 | if (sp->role.word != role.word) | ||
1057 | continue; | ||
1058 | |||
1059 | if (sp->unsync_children) | ||
1060 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | ||
1061 | |||
962 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1062 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
963 | pgprintk("%s: found\n", __func__); | 1063 | pgprintk("%s: found\n", __func__); |
964 | return sp; | 1064 | return sp; |
@@ -971,8 +1071,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
971 | sp->gfn = gfn; | 1071 | sp->gfn = gfn; |
972 | sp->role = role; | 1072 | sp->role = role; |
973 | hlist_add_head(&sp->hash_link, bucket); | 1073 | hlist_add_head(&sp->hash_link, bucket); |
974 | if (!metaphysical) | 1074 | if (!metaphysical) { |
975 | rmap_write_protect(vcpu->kvm, gfn); | 1075 | rmap_write_protect(vcpu->kvm, gfn); |
1076 | account_shadowed(vcpu->kvm, gfn); | ||
1077 | } | ||
976 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1078 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
977 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 1079 | vcpu->arch.mmu.prefetch_page(vcpu, sp); |
978 | else | 1080 | else |
@@ -1078,14 +1180,47 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1078 | } | 1180 | } |
1079 | } | 1181 | } |
1080 | 1182 | ||
1183 | struct zap_walker { | ||
1184 | struct kvm_unsync_walk walker; | ||
1185 | struct kvm *kvm; | ||
1186 | int zapped; | ||
1187 | }; | ||
1188 | |||
1189 | static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
1190 | { | ||
1191 | struct zap_walker *zap_walk = container_of(walk, struct zap_walker, | ||
1192 | walker); | ||
1193 | kvm_mmu_zap_page(zap_walk->kvm, sp); | ||
1194 | zap_walk->zapped = 1; | ||
1195 | return 0; | ||
1196 | } | ||
1197 | |||
1198 | static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1199 | { | ||
1200 | struct zap_walker walker = { | ||
1201 | .walker = { .entry = mmu_zap_fn, }, | ||
1202 | .kvm = kvm, | ||
1203 | .zapped = 0, | ||
1204 | }; | ||
1205 | |||
1206 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | ||
1207 | return 0; | ||
1208 | mmu_unsync_walk(sp, &walker.walker); | ||
1209 | return walker.zapped; | ||
1210 | } | ||
1211 | |||
1081 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1212 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1082 | { | 1213 | { |
1214 | int ret; | ||
1083 | ++kvm->stat.mmu_shadow_zapped; | 1215 | ++kvm->stat.mmu_shadow_zapped; |
1216 | ret = mmu_zap_unsync_children(kvm, sp); | ||
1084 | kvm_mmu_page_unlink_children(kvm, sp); | 1217 | kvm_mmu_page_unlink_children(kvm, sp); |
1085 | kvm_mmu_unlink_parents(kvm, sp); | 1218 | kvm_mmu_unlink_parents(kvm, sp); |
1086 | kvm_flush_remote_tlbs(kvm); | 1219 | kvm_flush_remote_tlbs(kvm); |
1087 | if (!sp->role.invalid && !sp->role.metaphysical) | 1220 | if (!sp->role.invalid && !sp->role.metaphysical) |
1088 | unaccount_shadowed(kvm, sp->gfn); | 1221 | unaccount_shadowed(kvm, sp->gfn); |
1222 | if (sp->unsync) | ||
1223 | kvm_unlink_unsync_page(kvm, sp); | ||
1089 | if (!sp->root_count) { | 1224 | if (!sp->root_count) { |
1090 | hlist_del(&sp->hash_link); | 1225 | hlist_del(&sp->hash_link); |
1091 | kvm_mmu_free_page(kvm, sp); | 1226 | kvm_mmu_free_page(kvm, sp); |
@@ -1095,7 +1230,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1095 | kvm_reload_remote_mmus(kvm); | 1230 | kvm_reload_remote_mmus(kvm); |
1096 | } | 1231 | } |
1097 | kvm_mmu_reset_last_pte_updated(kvm); | 1232 | kvm_mmu_reset_last_pte_updated(kvm); |
1098 | return 0; | 1233 | return ret; |
1099 | } | 1234 | } |
1100 | 1235 | ||
1101 | /* | 1236 | /* |
@@ -1201,10 +1336,58 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1201 | return page; | 1336 | return page; |
1202 | } | 1337 | } |
1203 | 1338 | ||
1339 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
1340 | { | ||
1341 | sp->unsync_children = 1; | ||
1342 | return 1; | ||
1343 | } | ||
1344 | |||
1345 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
1346 | { | ||
1347 | unsigned index; | ||
1348 | struct hlist_head *bucket; | ||
1349 | struct kvm_mmu_page *s; | ||
1350 | struct hlist_node *node, *n; | ||
1351 | |||
1352 | index = kvm_page_table_hashfn(sp->gfn); | ||
1353 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | ||
1354 | /* don't unsync if pagetable is shadowed with multiple roles */ | ||
1355 | hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { | ||
1356 | if (s->gfn != sp->gfn || s->role.metaphysical) | ||
1357 | continue; | ||
1358 | if (s->role.word != sp->role.word) | ||
1359 | return 1; | ||
1360 | } | ||
1361 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | ||
1362 | ++vcpu->kvm->stat.mmu_unsync; | ||
1363 | sp->unsync = 1; | ||
1364 | mmu_convert_notrap(sp); | ||
1365 | return 0; | ||
1366 | } | ||
1367 | |||
1368 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
1369 | bool can_unsync) | ||
1370 | { | ||
1371 | struct kvm_mmu_page *shadow; | ||
1372 | |||
1373 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | ||
1374 | if (shadow) { | ||
1375 | if (shadow->role.level != PT_PAGE_TABLE_LEVEL) | ||
1376 | return 1; | ||
1377 | if (shadow->unsync) | ||
1378 | return 0; | ||
1379 | if (can_unsync) | ||
1380 | return kvm_unsync_page(vcpu, shadow); | ||
1381 | return 1; | ||
1382 | } | ||
1383 | return 0; | ||
1384 | } | ||
1385 | |||
1204 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1386 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1205 | unsigned pte_access, int user_fault, | 1387 | unsigned pte_access, int user_fault, |
1206 | int write_fault, int dirty, int largepage, | 1388 | int write_fault, int dirty, int largepage, |
1207 | gfn_t gfn, pfn_t pfn, bool speculative) | 1389 | gfn_t gfn, pfn_t pfn, bool speculative, |
1390 | bool can_unsync) | ||
1208 | { | 1391 | { |
1209 | u64 spte; | 1392 | u64 spte; |
1210 | int ret = 0; | 1393 | int ret = 0; |
@@ -1231,7 +1414,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1231 | 1414 | ||
1232 | if ((pte_access & ACC_WRITE_MASK) | 1415 | if ((pte_access & ACC_WRITE_MASK) |
1233 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1416 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
1234 | struct kvm_mmu_page *shadow; | ||
1235 | 1417 | ||
1236 | if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) { | 1418 | if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) { |
1237 | ret = 1; | 1419 | ret = 1; |
@@ -1241,8 +1423,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1241 | 1423 | ||
1242 | spte |= PT_WRITABLE_MASK; | 1424 | spte |= PT_WRITABLE_MASK; |
1243 | 1425 | ||
1244 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1426 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
1245 | if (shadow) { | ||
1246 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1427 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
1247 | __func__, gfn); | 1428 | __func__, gfn); |
1248 | ret = 1; | 1429 | ret = 1; |
@@ -1260,7 +1441,6 @@ set_pte: | |||
1260 | return ret; | 1441 | return ret; |
1261 | } | 1442 | } |
1262 | 1443 | ||
1263 | |||
1264 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1444 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1265 | unsigned pt_access, unsigned pte_access, | 1445 | unsigned pt_access, unsigned pte_access, |
1266 | int user_fault, int write_fault, int dirty, | 1446 | int user_fault, int write_fault, int dirty, |
@@ -1298,7 +1478,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1298 | } | 1478 | } |
1299 | } | 1479 | } |
1300 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | 1480 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, |
1301 | dirty, largepage, gfn, pfn, speculative)) { | 1481 | dirty, largepage, gfn, pfn, speculative, true)) { |
1302 | if (write_fault) | 1482 | if (write_fault) |
1303 | *ptwrite = 1; | 1483 | *ptwrite = 1; |
1304 | kvm_x86_ops->tlb_flush(vcpu); | 1484 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1518,10 +1698,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1518 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 1698 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
1519 | } | 1699 | } |
1520 | 1700 | ||
1521 | static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
1522 | { | ||
1523 | } | ||
1524 | |||
1525 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | 1701 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) |
1526 | { | 1702 | { |
1527 | int i; | 1703 | int i; |