diff options
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu.c | 348 |
1 files changed, 198 insertions, 150 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4cb164268846..be3cea4407ff 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -135,8 +135,6 @@ module_param(dbg, bool, 0644); | |||
135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 135 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
136 | | PT64_NX_MASK) | 136 | | PT64_NX_MASK) |
137 | 137 | ||
138 | #define PTE_LIST_EXT 4 | ||
139 | |||
140 | #define ACC_EXEC_MASK 1 | 138 | #define ACC_EXEC_MASK 1 |
141 | #define ACC_WRITE_MASK PT_WRITABLE_MASK | 139 | #define ACC_WRITE_MASK PT_WRITABLE_MASK |
142 | #define ACC_USER_MASK PT_USER_MASK | 140 | #define ACC_USER_MASK PT_USER_MASK |
@@ -151,6 +149,9 @@ module_param(dbg, bool, 0644); | |||
151 | 149 | ||
152 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | 150 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
153 | 151 | ||
152 | /* make pte_list_desc fit well in cache line */ | ||
153 | #define PTE_LIST_EXT 3 | ||
154 | |||
154 | struct pte_list_desc { | 155 | struct pte_list_desc { |
155 | u64 *sptes[PTE_LIST_EXT]; | 156 | u64 *sptes[PTE_LIST_EXT]; |
156 | struct pte_list_desc *more; | 157 | struct pte_list_desc *more; |
@@ -550,19 +551,29 @@ static u64 mmu_spte_get_lockless(u64 *sptep) | |||
550 | 551 | ||
551 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) | 552 | static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) |
552 | { | 553 | { |
553 | rcu_read_lock(); | 554 | /* |
554 | atomic_inc(&vcpu->kvm->arch.reader_counter); | 555 | * Prevent page table teardown by making any free-er wait during |
555 | 556 | * kvm_flush_remote_tlbs() IPI to all active vcpus. | |
556 | /* Increase the counter before walking shadow page table */ | 557 | */ |
557 | smp_mb__after_atomic_inc(); | 558 | local_irq_disable(); |
559 | vcpu->mode = READING_SHADOW_PAGE_TABLES; | ||
560 | /* | ||
561 | * Make sure a following spte read is not reordered ahead of the write | ||
562 | * to vcpu->mode. | ||
563 | */ | ||
564 | smp_mb(); | ||
558 | } | 565 | } |
559 | 566 | ||
560 | static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) | 567 | static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) |
561 | { | 568 | { |
562 | /* Decrease the counter after walking shadow page table finished */ | 569 | /* |
563 | smp_mb__before_atomic_dec(); | 570 | * Make sure the write to vcpu->mode is not reordered in front of |
564 | atomic_dec(&vcpu->kvm->arch.reader_counter); | 571 | * reads to sptes. If it does, kvm_commit_zap_page() can see us |
565 | rcu_read_unlock(); | 572 | * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. |
573 | */ | ||
574 | smp_mb(); | ||
575 | vcpu->mode = OUTSIDE_GUEST_MODE; | ||
576 | local_irq_enable(); | ||
566 | } | 577 | } |
567 | 578 | ||
568 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, | 579 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
@@ -841,32 +852,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, | |||
841 | return count; | 852 | return count; |
842 | } | 853 | } |
843 | 854 | ||
844 | static u64 *pte_list_next(unsigned long *pte_list, u64 *spte) | ||
845 | { | ||
846 | struct pte_list_desc *desc; | ||
847 | u64 *prev_spte; | ||
848 | int i; | ||
849 | |||
850 | if (!*pte_list) | ||
851 | return NULL; | ||
852 | else if (!(*pte_list & 1)) { | ||
853 | if (!spte) | ||
854 | return (u64 *)*pte_list; | ||
855 | return NULL; | ||
856 | } | ||
857 | desc = (struct pte_list_desc *)(*pte_list & ~1ul); | ||
858 | prev_spte = NULL; | ||
859 | while (desc) { | ||
860 | for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { | ||
861 | if (prev_spte == spte) | ||
862 | return desc->sptes[i]; | ||
863 | prev_spte = desc->sptes[i]; | ||
864 | } | ||
865 | desc = desc->more; | ||
866 | } | ||
867 | return NULL; | ||
868 | } | ||
869 | |||
870 | static void | 855 | static void |
871 | pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, | 856 | pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, |
872 | int i, struct pte_list_desc *prev_desc) | 857 | int i, struct pte_list_desc *prev_desc) |
@@ -987,11 +972,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
987 | return pte_list_add(vcpu, spte, rmapp); | 972 | return pte_list_add(vcpu, spte, rmapp); |
988 | } | 973 | } |
989 | 974 | ||
990 | static u64 *rmap_next(unsigned long *rmapp, u64 *spte) | ||
991 | { | ||
992 | return pte_list_next(rmapp, spte); | ||
993 | } | ||
994 | |||
995 | static void rmap_remove(struct kvm *kvm, u64 *spte) | 975 | static void rmap_remove(struct kvm *kvm, u64 *spte) |
996 | { | 976 | { |
997 | struct kvm_mmu_page *sp; | 977 | struct kvm_mmu_page *sp; |
@@ -1004,106 +984,201 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
1004 | pte_list_remove(spte, rmapp); | 984 | pte_list_remove(spte, rmapp); |
1005 | } | 985 | } |
1006 | 986 | ||
987 | /* | ||
988 | * Used by the following functions to iterate through the sptes linked by a | ||
989 | * rmap. All fields are private and not assumed to be used outside. | ||
990 | */ | ||
991 | struct rmap_iterator { | ||
992 | /* private fields */ | ||
993 | struct pte_list_desc *desc; /* holds the sptep if not NULL */ | ||
994 | int pos; /* index of the sptep */ | ||
995 | }; | ||
996 | |||
997 | /* | ||
998 | * Iteration must be started by this function. This should also be used after | ||
999 | * removing/dropping sptes from the rmap link because in such cases the | ||
1000 | * information in the itererator may not be valid. | ||
1001 | * | ||
1002 | * Returns sptep if found, NULL otherwise. | ||
1003 | */ | ||
1004 | static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) | ||
1005 | { | ||
1006 | if (!rmap) | ||
1007 | return NULL; | ||
1008 | |||
1009 | if (!(rmap & 1)) { | ||
1010 | iter->desc = NULL; | ||
1011 | return (u64 *)rmap; | ||
1012 | } | ||
1013 | |||
1014 | iter->desc = (struct pte_list_desc *)(rmap & ~1ul); | ||
1015 | iter->pos = 0; | ||
1016 | return iter->desc->sptes[iter->pos]; | ||
1017 | } | ||
1018 | |||
1019 | /* | ||
1020 | * Must be used with a valid iterator: e.g. after rmap_get_first(). | ||
1021 | * | ||
1022 | * Returns sptep if found, NULL otherwise. | ||
1023 | */ | ||
1024 | static u64 *rmap_get_next(struct rmap_iterator *iter) | ||
1025 | { | ||
1026 | if (iter->desc) { | ||
1027 | if (iter->pos < PTE_LIST_EXT - 1) { | ||
1028 | u64 *sptep; | ||
1029 | |||
1030 | ++iter->pos; | ||
1031 | sptep = iter->desc->sptes[iter->pos]; | ||
1032 | if (sptep) | ||
1033 | return sptep; | ||
1034 | } | ||
1035 | |||
1036 | iter->desc = iter->desc->more; | ||
1037 | |||
1038 | if (iter->desc) { | ||
1039 | iter->pos = 0; | ||
1040 | /* desc->sptes[0] cannot be NULL */ | ||
1041 | return iter->desc->sptes[iter->pos]; | ||
1042 | } | ||
1043 | } | ||
1044 | |||
1045 | return NULL; | ||
1046 | } | ||
1047 | |||
1007 | static void drop_spte(struct kvm *kvm, u64 *sptep) | 1048 | static void drop_spte(struct kvm *kvm, u64 *sptep) |
1008 | { | 1049 | { |
1009 | if (mmu_spte_clear_track_bits(sptep)) | 1050 | if (mmu_spte_clear_track_bits(sptep)) |
1010 | rmap_remove(kvm, sptep); | 1051 | rmap_remove(kvm, sptep); |
1011 | } | 1052 | } |
1012 | 1053 | ||
1013 | int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, | 1054 | static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level) |
1014 | struct kvm_memory_slot *slot) | ||
1015 | { | 1055 | { |
1016 | unsigned long *rmapp; | 1056 | u64 *sptep; |
1017 | u64 *spte; | 1057 | struct rmap_iterator iter; |
1018 | int i, write_protected = 0; | 1058 | int write_protected = 0; |
1019 | 1059 | ||
1020 | rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); | 1060 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1021 | spte = rmap_next(rmapp, NULL); | 1061 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1022 | while (spte) { | 1062 | rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); |
1023 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1063 | |
1024 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 1064 | if (!is_writable_pte(*sptep)) { |
1025 | if (is_writable_pte(*spte)) { | 1065 | sptep = rmap_get_next(&iter); |
1026 | mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); | 1066 | continue; |
1027 | write_protected = 1; | ||
1028 | } | 1067 | } |
1029 | spte = rmap_next(rmapp, spte); | ||
1030 | } | ||
1031 | 1068 | ||
1032 | /* check for huge page mappings */ | 1069 | if (level == PT_PAGE_TABLE_LEVEL) { |
1033 | for (i = PT_DIRECTORY_LEVEL; | 1070 | mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK); |
1034 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | 1071 | sptep = rmap_get_next(&iter); |
1035 | rmapp = __gfn_to_rmap(gfn, i, slot); | 1072 | } else { |
1036 | spte = rmap_next(rmapp, NULL); | 1073 | BUG_ON(!is_large_pte(*sptep)); |
1037 | while (spte) { | 1074 | drop_spte(kvm, sptep); |
1038 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1075 | --kvm->stat.lpages; |
1039 | BUG_ON(!is_large_pte(*spte)); | 1076 | sptep = rmap_get_first(*rmapp, &iter); |
1040 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | ||
1041 | if (is_writable_pte(*spte)) { | ||
1042 | drop_spte(kvm, spte); | ||
1043 | --kvm->stat.lpages; | ||
1044 | spte = NULL; | ||
1045 | write_protected = 1; | ||
1046 | } | ||
1047 | spte = rmap_next(rmapp, spte); | ||
1048 | } | 1077 | } |
1078 | |||
1079 | write_protected = 1; | ||
1049 | } | 1080 | } |
1050 | 1081 | ||
1051 | return write_protected; | 1082 | return write_protected; |
1052 | } | 1083 | } |
1053 | 1084 | ||
1085 | /** | ||
1086 | * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages | ||
1087 | * @kvm: kvm instance | ||
1088 | * @slot: slot to protect | ||
1089 | * @gfn_offset: start of the BITS_PER_LONG pages we care about | ||
1090 | * @mask: indicates which pages we should protect | ||
1091 | * | ||
1092 | * Used when we do not need to care about huge page mappings: e.g. during dirty | ||
1093 | * logging we do not have any such mappings. | ||
1094 | */ | ||
1095 | void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | ||
1096 | struct kvm_memory_slot *slot, | ||
1097 | gfn_t gfn_offset, unsigned long mask) | ||
1098 | { | ||
1099 | unsigned long *rmapp; | ||
1100 | |||
1101 | while (mask) { | ||
1102 | rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; | ||
1103 | __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL); | ||
1104 | |||
1105 | /* clear the first set bit */ | ||
1106 | mask &= mask - 1; | ||
1107 | } | ||
1108 | } | ||
1109 | |||
1054 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) | 1110 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) |
1055 | { | 1111 | { |
1056 | struct kvm_memory_slot *slot; | 1112 | struct kvm_memory_slot *slot; |
1113 | unsigned long *rmapp; | ||
1114 | int i; | ||
1115 | int write_protected = 0; | ||
1057 | 1116 | ||
1058 | slot = gfn_to_memslot(kvm, gfn); | 1117 | slot = gfn_to_memslot(kvm, gfn); |
1059 | return kvm_mmu_rmap_write_protect(kvm, gfn, slot); | 1118 | |
1119 | for (i = PT_PAGE_TABLE_LEVEL; | ||
1120 | i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { | ||
1121 | rmapp = __gfn_to_rmap(gfn, i, slot); | ||
1122 | write_protected |= __rmap_write_protect(kvm, rmapp, i); | ||
1123 | } | ||
1124 | |||
1125 | return write_protected; | ||
1060 | } | 1126 | } |
1061 | 1127 | ||
1062 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1128 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1063 | unsigned long data) | 1129 | unsigned long data) |
1064 | { | 1130 | { |
1065 | u64 *spte; | 1131 | u64 *sptep; |
1132 | struct rmap_iterator iter; | ||
1066 | int need_tlb_flush = 0; | 1133 | int need_tlb_flush = 0; |
1067 | 1134 | ||
1068 | while ((spte = rmap_next(rmapp, NULL))) { | 1135 | while ((sptep = rmap_get_first(*rmapp, &iter))) { |
1069 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 1136 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1070 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | 1137 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); |
1071 | drop_spte(kvm, spte); | 1138 | |
1139 | drop_spte(kvm, sptep); | ||
1072 | need_tlb_flush = 1; | 1140 | need_tlb_flush = 1; |
1073 | } | 1141 | } |
1142 | |||
1074 | return need_tlb_flush; | 1143 | return need_tlb_flush; |
1075 | } | 1144 | } |
1076 | 1145 | ||
1077 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1146 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1078 | unsigned long data) | 1147 | unsigned long data) |
1079 | { | 1148 | { |
1149 | u64 *sptep; | ||
1150 | struct rmap_iterator iter; | ||
1080 | int need_flush = 0; | 1151 | int need_flush = 0; |
1081 | u64 *spte, new_spte; | 1152 | u64 new_spte; |
1082 | pte_t *ptep = (pte_t *)data; | 1153 | pte_t *ptep = (pte_t *)data; |
1083 | pfn_t new_pfn; | 1154 | pfn_t new_pfn; |
1084 | 1155 | ||
1085 | WARN_ON(pte_huge(*ptep)); | 1156 | WARN_ON(pte_huge(*ptep)); |
1086 | new_pfn = pte_pfn(*ptep); | 1157 | new_pfn = pte_pfn(*ptep); |
1087 | spte = rmap_next(rmapp, NULL); | 1158 | |
1088 | while (spte) { | 1159 | for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { |
1089 | BUG_ON(!is_shadow_present_pte(*spte)); | 1160 | BUG_ON(!is_shadow_present_pte(*sptep)); |
1090 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); | 1161 | rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); |
1162 | |||
1091 | need_flush = 1; | 1163 | need_flush = 1; |
1164 | |||
1092 | if (pte_write(*ptep)) { | 1165 | if (pte_write(*ptep)) { |
1093 | drop_spte(kvm, spte); | 1166 | drop_spte(kvm, sptep); |
1094 | spte = rmap_next(rmapp, NULL); | 1167 | sptep = rmap_get_first(*rmapp, &iter); |
1095 | } else { | 1168 | } else { |
1096 | new_spte = *spte &~ (PT64_BASE_ADDR_MASK); | 1169 | new_spte = *sptep & ~PT64_BASE_ADDR_MASK; |
1097 | new_spte |= (u64)new_pfn << PAGE_SHIFT; | 1170 | new_spte |= (u64)new_pfn << PAGE_SHIFT; |
1098 | 1171 | ||
1099 | new_spte &= ~PT_WRITABLE_MASK; | 1172 | new_spte &= ~PT_WRITABLE_MASK; |
1100 | new_spte &= ~SPTE_HOST_WRITEABLE; | 1173 | new_spte &= ~SPTE_HOST_WRITEABLE; |
1101 | new_spte &= ~shadow_accessed_mask; | 1174 | new_spte &= ~shadow_accessed_mask; |
1102 | mmu_spte_clear_track_bits(spte); | 1175 | |
1103 | mmu_spte_set(spte, new_spte); | 1176 | mmu_spte_clear_track_bits(sptep); |
1104 | spte = rmap_next(rmapp, spte); | 1177 | mmu_spte_set(sptep, new_spte); |
1178 | sptep = rmap_get_next(&iter); | ||
1105 | } | 1179 | } |
1106 | } | 1180 | } |
1181 | |||
1107 | if (need_flush) | 1182 | if (need_flush) |
1108 | kvm_flush_remote_tlbs(kvm); | 1183 | kvm_flush_remote_tlbs(kvm); |
1109 | 1184 | ||
@@ -1162,7 +1237,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | |||
1162 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1237 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1163 | unsigned long data) | 1238 | unsigned long data) |
1164 | { | 1239 | { |
1165 | u64 *spte; | 1240 | u64 *sptep; |
1241 | struct rmap_iterator iter; | ||
1166 | int young = 0; | 1242 | int young = 0; |
1167 | 1243 | ||
1168 | /* | 1244 | /* |
@@ -1175,25 +1251,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1175 | if (!shadow_accessed_mask) | 1251 | if (!shadow_accessed_mask) |
1176 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1252 | return kvm_unmap_rmapp(kvm, rmapp, data); |
1177 | 1253 | ||
1178 | spte = rmap_next(rmapp, NULL); | 1254 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1179 | while (spte) { | 1255 | sptep = rmap_get_next(&iter)) { |
1180 | int _young; | 1256 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1181 | u64 _spte = *spte; | 1257 | |
1182 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 1258 | if (*sptep & PT_ACCESSED_MASK) { |
1183 | _young = _spte & PT_ACCESSED_MASK; | ||
1184 | if (_young) { | ||
1185 | young = 1; | 1259 | young = 1; |
1186 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | 1260 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep); |
1187 | } | 1261 | } |
1188 | spte = rmap_next(rmapp, spte); | ||
1189 | } | 1262 | } |
1263 | |||
1190 | return young; | 1264 | return young; |
1191 | } | 1265 | } |
1192 | 1266 | ||
1193 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1267 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1194 | unsigned long data) | 1268 | unsigned long data) |
1195 | { | 1269 | { |
1196 | u64 *spte; | 1270 | u64 *sptep; |
1271 | struct rmap_iterator iter; | ||
1197 | int young = 0; | 1272 | int young = 0; |
1198 | 1273 | ||
1199 | /* | 1274 | /* |
@@ -1204,16 +1279,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1204 | if (!shadow_accessed_mask) | 1279 | if (!shadow_accessed_mask) |
1205 | goto out; | 1280 | goto out; |
1206 | 1281 | ||
1207 | spte = rmap_next(rmapp, NULL); | 1282 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1208 | while (spte) { | 1283 | sptep = rmap_get_next(&iter)) { |
1209 | u64 _spte = *spte; | 1284 | BUG_ON(!(*sptep & PT_PRESENT_MASK)); |
1210 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | 1285 | |
1211 | young = _spte & PT_ACCESSED_MASK; | 1286 | if (*sptep & PT_ACCESSED_MASK) { |
1212 | if (young) { | ||
1213 | young = 1; | 1287 | young = 1; |
1214 | break; | 1288 | break; |
1215 | } | 1289 | } |
1216 | spte = rmap_next(rmapp, spte); | ||
1217 | } | 1290 | } |
1218 | out: | 1291 | out: |
1219 | return young; | 1292 | return young; |
@@ -1865,10 +1938,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | |||
1865 | 1938 | ||
1866 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | 1939 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
1867 | { | 1940 | { |
1868 | u64 *parent_pte; | 1941 | u64 *sptep; |
1942 | struct rmap_iterator iter; | ||
1869 | 1943 | ||
1870 | while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL))) | 1944 | while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) |
1871 | drop_parent_pte(sp, parent_pte); | 1945 | drop_parent_pte(sp, sptep); |
1872 | } | 1946 | } |
1873 | 1947 | ||
1874 | static int mmu_zap_unsync_children(struct kvm *kvm, | 1948 | static int mmu_zap_unsync_children(struct kvm *kvm, |
@@ -1925,30 +1999,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1925 | return ret; | 1999 | return ret; |
1926 | } | 2000 | } |
1927 | 2001 | ||
1928 | static void kvm_mmu_isolate_pages(struct list_head *invalid_list) | ||
1929 | { | ||
1930 | struct kvm_mmu_page *sp; | ||
1931 | |||
1932 | list_for_each_entry(sp, invalid_list, link) | ||
1933 | kvm_mmu_isolate_page(sp); | ||
1934 | } | ||
1935 | |||
1936 | static void free_pages_rcu(struct rcu_head *head) | ||
1937 | { | ||
1938 | struct kvm_mmu_page *next, *sp; | ||
1939 | |||
1940 | sp = container_of(head, struct kvm_mmu_page, rcu); | ||
1941 | while (sp) { | ||
1942 | if (!list_empty(&sp->link)) | ||
1943 | next = list_first_entry(&sp->link, | ||
1944 | struct kvm_mmu_page, link); | ||
1945 | else | ||
1946 | next = NULL; | ||
1947 | kvm_mmu_free_page(sp); | ||
1948 | sp = next; | ||
1949 | } | ||
1950 | } | ||
1951 | |||
1952 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2002 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1953 | struct list_head *invalid_list) | 2003 | struct list_head *invalid_list) |
1954 | { | 2004 | { |
@@ -1957,17 +2007,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1957 | if (list_empty(invalid_list)) | 2007 | if (list_empty(invalid_list)) |
1958 | return; | 2008 | return; |
1959 | 2009 | ||
1960 | kvm_flush_remote_tlbs(kvm); | 2010 | /* |
1961 | 2011 | * wmb: make sure everyone sees our modifications to the page tables | |
1962 | if (atomic_read(&kvm->arch.reader_counter)) { | 2012 | * rmb: make sure we see changes to vcpu->mode |
1963 | kvm_mmu_isolate_pages(invalid_list); | 2013 | */ |
1964 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2014 | smp_mb(); |
1965 | list_del_init(invalid_list); | ||
1966 | 2015 | ||
1967 | trace_kvm_mmu_delay_free_pages(sp); | 2016 | /* |
1968 | call_rcu(&sp->rcu, free_pages_rcu); | 2017 | * Wait for all vcpus to exit guest mode and/or lockless shadow |
1969 | return; | 2018 | * page table walks. |
1970 | } | 2019 | */ |
2020 | kvm_flush_remote_tlbs(kvm); | ||
1971 | 2021 | ||
1972 | do { | 2022 | do { |
1973 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2023 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
@@ -1975,7 +2025,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
1975 | kvm_mmu_isolate_page(sp); | 2025 | kvm_mmu_isolate_page(sp); |
1976 | kvm_mmu_free_page(sp); | 2026 | kvm_mmu_free_page(sp); |
1977 | } while (!list_empty(invalid_list)); | 2027 | } while (!list_empty(invalid_list)); |
1978 | |||
1979 | } | 2028 | } |
1980 | 2029 | ||
1981 | /* | 2030 | /* |
@@ -2546,8 +2595,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2546 | *gfnp = gfn; | 2595 | *gfnp = gfn; |
2547 | kvm_release_pfn_clean(pfn); | 2596 | kvm_release_pfn_clean(pfn); |
2548 | pfn &= ~mask; | 2597 | pfn &= ~mask; |
2549 | if (!get_page_unless_zero(pfn_to_page(pfn))) | 2598 | kvm_get_pfn(pfn); |
2550 | BUG(); | ||
2551 | *pfnp = pfn; | 2599 | *pfnp = pfn; |
2552 | } | 2600 | } |
2553 | } | 2601 | } |
@@ -3554,7 +3602,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) | |||
3554 | * Skip write-flooding detected for the sp whose level is 1, because | 3602 | * Skip write-flooding detected for the sp whose level is 1, because |
3555 | * it can become unsync, then the guest page is not write-protected. | 3603 | * it can become unsync, then the guest page is not write-protected. |
3556 | */ | 3604 | */ |
3557 | if (sp->role.level == 1) | 3605 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) |
3558 | return false; | 3606 | return false; |
3559 | 3607 | ||
3560 | return ++sp->write_flooding_count >= 3; | 3608 | return ++sp->write_flooding_count >= 3; |