aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorTakuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>2012-03-21 10:50:34 -0400
committerAvi Kivity <avi@redhat.com>2012-04-08 09:08:27 -0400
commit1e3f42f03c38c29c1814199a6f0a2f01b919ea3f (patch)
tree0ceba904d28e89ded0954dcf40361a6ec235cce3 /arch/x86
parent220f773a0013bf6fe2eefd9718ac7471f368fd8e (diff)
KVM: MMU: Improve iteration through sptes from rmap
Iteration using rmap_next(), the actual body is pte_list_next(), is inefficient: every time we call it we start from checking whether rmap holds a single spte or points to a descriptor which links more sptes. In the case of shadow paging, this quadratic total iteration cost is a problem. Even for two dimensional paging, with EPT/NPT on, in which we almost always have a single mapping, the extra checks at the end of the iteration should be eliminated. This patch fixes this by introducing rmap_iterator which keeps the iteration context for the next search. Furthermore the implementation of rmap_next() is splitted into two functions, rmap_get_first() and rmap_get_next(), to avoid repeatedly checking whether the rmap being iterated on has only one spte. Although there seemed to be only a slight change for EPT/NPT, the actual improvement was significant: we observed that GET_DIRTY_LOG for 1GB dirty memory became 15% faster than before. This is probably because the new code is easy to make branch predictions. Note: we just remove pte_list_next() because we can think of parent_ptes as a reverse mapping. Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp> Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kvm/mmu.c196
-rw-r--r--arch/x86/kvm/mmu_audit.c10
2 files changed, 124 insertions, 82 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3213348e3a93..29ad6f9c58a5 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -842,32 +842,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
842 return count; 842 return count;
843} 843}
844 844
845static u64 *pte_list_next(unsigned long *pte_list, u64 *spte)
846{
847 struct pte_list_desc *desc;
848 u64 *prev_spte;
849 int i;
850
851 if (!*pte_list)
852 return NULL;
853 else if (!(*pte_list & 1)) {
854 if (!spte)
855 return (u64 *)*pte_list;
856 return NULL;
857 }
858 desc = (struct pte_list_desc *)(*pte_list & ~1ul);
859 prev_spte = NULL;
860 while (desc) {
861 for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
862 if (prev_spte == spte)
863 return desc->sptes[i];
864 prev_spte = desc->sptes[i];
865 }
866 desc = desc->more;
867 }
868 return NULL;
869}
870
871static void 845static void
872pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, 846pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
873 int i, struct pte_list_desc *prev_desc) 847 int i, struct pte_list_desc *prev_desc)
@@ -988,11 +962,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
988 return pte_list_add(vcpu, spte, rmapp); 962 return pte_list_add(vcpu, spte, rmapp);
989} 963}
990 964
991static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
992{
993 return pte_list_next(rmapp, spte);
994}
995
996static void rmap_remove(struct kvm *kvm, u64 *spte) 965static void rmap_remove(struct kvm *kvm, u64 *spte)
997{ 966{
998 struct kvm_mmu_page *sp; 967 struct kvm_mmu_page *sp;
@@ -1005,6 +974,67 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
1005 pte_list_remove(spte, rmapp); 974 pte_list_remove(spte, rmapp);
1006} 975}
1007 976
977/*
978 * Used by the following functions to iterate through the sptes linked by a
979 * rmap. All fields are private and not assumed to be used outside.
980 */
981struct rmap_iterator {
982 /* private fields */
983 struct pte_list_desc *desc; /* holds the sptep if not NULL */
984 int pos; /* index of the sptep */
985};
986
987/*
988 * Iteration must be started by this function. This should also be used after
989 * removing/dropping sptes from the rmap link because in such cases the
990 * information in the itererator may not be valid.
991 *
992 * Returns sptep if found, NULL otherwise.
993 */
994static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
995{
996 if (!rmap)
997 return NULL;
998
999 if (!(rmap & 1)) {
1000 iter->desc = NULL;
1001 return (u64 *)rmap;
1002 }
1003
1004 iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
1005 iter->pos = 0;
1006 return iter->desc->sptes[iter->pos];
1007}
1008
1009/*
1010 * Must be used with a valid iterator: e.g. after rmap_get_first().
1011 *
1012 * Returns sptep if found, NULL otherwise.
1013 */
1014static u64 *rmap_get_next(struct rmap_iterator *iter)
1015{
1016 if (iter->desc) {
1017 if (iter->pos < PTE_LIST_EXT - 1) {
1018 u64 *sptep;
1019
1020 ++iter->pos;
1021 sptep = iter->desc->sptes[iter->pos];
1022 if (sptep)
1023 return sptep;
1024 }
1025
1026 iter->desc = iter->desc->more;
1027
1028 if (iter->desc) {
1029 iter->pos = 0;
1030 /* desc->sptes[0] cannot be NULL */
1031 return iter->desc->sptes[iter->pos];
1032 }
1033 }
1034
1035 return NULL;
1036}
1037
1008static void drop_spte(struct kvm *kvm, u64 *sptep) 1038static void drop_spte(struct kvm *kvm, u64 *sptep)
1009{ 1039{
1010 if (mmu_spte_clear_track_bits(sptep)) 1040 if (mmu_spte_clear_track_bits(sptep))
@@ -1013,23 +1043,27 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
1013 1043
1014static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level) 1044static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
1015{ 1045{
1016 u64 *spte = NULL; 1046 u64 *sptep;
1047 struct rmap_iterator iter;
1017 int write_protected = 0; 1048 int write_protected = 0;
1018 1049
1019 while ((spte = rmap_next(rmapp, spte))) { 1050 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1020 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1051 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1021 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 1052 rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep);
1022 1053
1023 if (!is_writable_pte(*spte)) 1054 if (!is_writable_pte(*sptep)) {
1055 sptep = rmap_get_next(&iter);
1024 continue; 1056 continue;
1057 }
1025 1058
1026 if (level == PT_PAGE_TABLE_LEVEL) { 1059 if (level == PT_PAGE_TABLE_LEVEL) {
1027 mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); 1060 mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
1061 sptep = rmap_get_next(&iter);
1028 } else { 1062 } else {
1029 BUG_ON(!is_large_pte(*spte)); 1063 BUG_ON(!is_large_pte(*sptep));
1030 drop_spte(kvm, spte); 1064 drop_spte(kvm, sptep);
1031 --kvm->stat.lpages; 1065 --kvm->stat.lpages;
1032 spte = NULL; 1066 sptep = rmap_get_first(*rmapp, &iter);
1033 } 1067 }
1034 1068
1035 write_protected = 1; 1069 write_protected = 1;
@@ -1084,48 +1118,57 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
1084static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 1118static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
1085 unsigned long data) 1119 unsigned long data)
1086{ 1120{
1087 u64 *spte; 1121 u64 *sptep;
1122 struct rmap_iterator iter;
1088 int need_tlb_flush = 0; 1123 int need_tlb_flush = 0;
1089 1124
1090 while ((spte = rmap_next(rmapp, NULL))) { 1125 while ((sptep = rmap_get_first(*rmapp, &iter))) {
1091 BUG_ON(!(*spte & PT_PRESENT_MASK)); 1126 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1092 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); 1127 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
1093 drop_spte(kvm, spte); 1128
1129 drop_spte(kvm, sptep);
1094 need_tlb_flush = 1; 1130 need_tlb_flush = 1;
1095 } 1131 }
1132
1096 return need_tlb_flush; 1133 return need_tlb_flush;
1097} 1134}
1098 1135
1099static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, 1136static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
1100 unsigned long data) 1137 unsigned long data)
1101{ 1138{
1139 u64 *sptep;
1140 struct rmap_iterator iter;
1102 int need_flush = 0; 1141 int need_flush = 0;
1103 u64 *spte, new_spte; 1142 u64 new_spte;
1104 pte_t *ptep = (pte_t *)data; 1143 pte_t *ptep = (pte_t *)data;
1105 pfn_t new_pfn; 1144 pfn_t new_pfn;
1106 1145
1107 WARN_ON(pte_huge(*ptep)); 1146 WARN_ON(pte_huge(*ptep));
1108 new_pfn = pte_pfn(*ptep); 1147 new_pfn = pte_pfn(*ptep);
1109 spte = rmap_next(rmapp, NULL); 1148
1110 while (spte) { 1149 for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
1111 BUG_ON(!is_shadow_present_pte(*spte)); 1150 BUG_ON(!is_shadow_present_pte(*sptep));
1112 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); 1151 rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
1152
1113 need_flush = 1; 1153 need_flush = 1;
1154
1114 if (pte_write(*ptep)) { 1155 if (pte_write(*ptep)) {
1115 drop_spte(kvm, spte); 1156 drop_spte(kvm, sptep);
1116 spte = rmap_next(rmapp, NULL); 1157 sptep = rmap_get_first(*rmapp, &iter);
1117 } else { 1158 } else {
1118 new_spte = *spte &~ (PT64_BASE_ADDR_MASK); 1159 new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
1119 new_spte |= (u64)new_pfn << PAGE_SHIFT; 1160 new_spte |= (u64)new_pfn << PAGE_SHIFT;
1120 1161
1121 new_spte &= ~PT_WRITABLE_MASK; 1162 new_spte &= ~PT_WRITABLE_MASK;
1122 new_spte &= ~SPTE_HOST_WRITEABLE; 1163 new_spte &= ~SPTE_HOST_WRITEABLE;
1123 new_spte &= ~shadow_accessed_mask; 1164 new_spte &= ~shadow_accessed_mask;
1124 mmu_spte_clear_track_bits(spte); 1165
1125 mmu_spte_set(spte, new_spte); 1166 mmu_spte_clear_track_bits(sptep);
1126 spte = rmap_next(rmapp, spte); 1167 mmu_spte_set(sptep, new_spte);
1168 sptep = rmap_get_next(&iter);
1127 } 1169 }
1128 } 1170 }
1171
1129 if (need_flush) 1172 if (need_flush)
1130 kvm_flush_remote_tlbs(kvm); 1173 kvm_flush_remote_tlbs(kvm);
1131 1174
@@ -1184,7 +1227,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1184static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1227static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1185 unsigned long data) 1228 unsigned long data)
1186{ 1229{
1187 u64 *spte; 1230 u64 *sptep;
1231 struct rmap_iterator iter;
1188 int young = 0; 1232 int young = 0;
1189 1233
1190 /* 1234 /*
@@ -1197,25 +1241,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1197 if (!shadow_accessed_mask) 1241 if (!shadow_accessed_mask)
1198 return kvm_unmap_rmapp(kvm, rmapp, data); 1242 return kvm_unmap_rmapp(kvm, rmapp, data);
1199 1243
1200 spte = rmap_next(rmapp, NULL); 1244 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
1201 while (spte) { 1245 sptep = rmap_get_next(&iter)) {
1202 int _young; 1246 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1203 u64 _spte = *spte; 1247
1204 BUG_ON(!(_spte & PT_PRESENT_MASK)); 1248 if (*sptep & PT_ACCESSED_MASK) {
1205 _young = _spte & PT_ACCESSED_MASK;
1206 if (_young) {
1207 young = 1; 1249 young = 1;
1208 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); 1250 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep);
1209 } 1251 }
1210 spte = rmap_next(rmapp, spte);
1211 } 1252 }
1253
1212 return young; 1254 return young;
1213} 1255}
1214 1256
1215static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 1257static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1216 unsigned long data) 1258 unsigned long data)
1217{ 1259{
1218 u64 *spte; 1260 u64 *sptep;
1261 struct rmap_iterator iter;
1219 int young = 0; 1262 int young = 0;
1220 1263
1221 /* 1264 /*
@@ -1226,16 +1269,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
1226 if (!shadow_accessed_mask) 1269 if (!shadow_accessed_mask)
1227 goto out; 1270 goto out;
1228 1271
1229 spte = rmap_next(rmapp, NULL); 1272 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
1230 while (spte) { 1273 sptep = rmap_get_next(&iter)) {
1231 u64 _spte = *spte; 1274 BUG_ON(!(*sptep & PT_PRESENT_MASK));
1232 BUG_ON(!(_spte & PT_PRESENT_MASK)); 1275
1233 young = _spte & PT_ACCESSED_MASK; 1276 if (*sptep & PT_ACCESSED_MASK) {
1234 if (young) {
1235 young = 1; 1277 young = 1;
1236 break; 1278 break;
1237 } 1279 }
1238 spte = rmap_next(rmapp, spte);
1239 } 1280 }
1240out: 1281out:
1241 return young; 1282 return young;
@@ -1887,10 +1928,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
1887 1928
1888static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) 1929static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
1889{ 1930{
1890 u64 *parent_pte; 1931 u64 *sptep;
1932 struct rmap_iterator iter;
1891 1933
1892 while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL))) 1934 while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
1893 drop_parent_pte(sp, parent_pte); 1935 drop_parent_pte(sp, sptep);
1894} 1936}
1895 1937
1896static int mmu_zap_unsync_children(struct kvm *kvm, 1938static int mmu_zap_unsync_children(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 715da5a19a5b..7d7d0b9e23eb 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -192,7 +192,8 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
192{ 192{
193 struct kvm_memory_slot *slot; 193 struct kvm_memory_slot *slot;
194 unsigned long *rmapp; 194 unsigned long *rmapp;
195 u64 *spte; 195 u64 *sptep;
196 struct rmap_iterator iter;
196 197
197 if (sp->role.direct || sp->unsync || sp->role.invalid) 198 if (sp->role.direct || sp->unsync || sp->role.invalid)
198 return; 199 return;
@@ -200,13 +201,12 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
200 slot = gfn_to_memslot(kvm, sp->gfn); 201 slot = gfn_to_memslot(kvm, sp->gfn);
201 rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; 202 rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
202 203
203 spte = rmap_next(rmapp, NULL); 204 for (sptep = rmap_get_first(*rmapp, &iter); sptep;
204 while (spte) { 205 sptep = rmap_get_next(&iter)) {
205 if (is_writable_pte(*spte)) 206 if (is_writable_pte(*sptep))
206 audit_printk(kvm, "shadow page has writable " 207 audit_printk(kvm, "shadow page has writable "
207 "mappings: gfn %llx role %x\n", 208 "mappings: gfn %llx role %x\n",
208 sp->gfn, sp->role.word); 209 sp->gfn, sp->role.word);
209 spte = rmap_next(rmapp, spte);
210 } 210 }
211} 211}
212 212