aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2017-06-21 11:27:18 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-07-07 11:25:18 -0400
commit6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d (patch)
tree00076a72eda7738433726f6e6bc4ce1644d370f6
parent278b1d709c6acc6f7d138fed775c76695b068e43 (diff)
libceph: pg_upmap[_items] infrastructure
pg_temp and pg_upmap encodings are the same (PG -> array of osds), except for the incremental remove: it's an empty mapping in new_pg_temp for pg_temp and a separate old_pg_upmap set for pg_upmap. (This isn't to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't looked at as an example for pg_upmap encoding.) Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap. __decode_pg_temp() stores into pg_temp union member, but since pg_upmap union member is identical, reading through pg_upmap later is OK. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--include/linux/ceph/osdmap.h10
-rw-r--r--net/ceph/debugfs.c23
-rw-r--r--net/ceph/osdmap.c135
3 files changed, 164 insertions, 4 deletions
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index fe6d189bdd30..c612cff81f5c 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -143,10 +143,14 @@ struct ceph_pg_mapping {
143 struct { 143 struct {
144 int len; 144 int len;
145 int osds[]; 145 int osds[];
146 } pg_temp; 146 } pg_temp, pg_upmap;
147 struct { 147 struct {
148 int osd; 148 int osd;
149 } primary_temp; 149 } primary_temp;
150 struct {
151 int len;
152 int from_to[][2];
153 } pg_upmap_items;
150 }; 154 };
151}; 155};
152 156
@@ -165,6 +169,10 @@ struct ceph_osdmap {
165 struct rb_root pg_temp; 169 struct rb_root pg_temp;
166 struct rb_root primary_temp; 170 struct rb_root primary_temp;
167 171
172 /* remap (post-CRUSH, pre-up) */
173 struct rb_root pg_upmap; /* PG := raw set */
174 struct rb_root pg_upmap_items; /* from -> to within raw set */
175
168 u32 *osd_primary_affinity; 176 u32 *osd_primary_affinity;
169 177
170 struct rb_root pg_pools; 178 struct rb_root pg_pools;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 017f15c575f8..4f57d5bcaba2 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p)
104 seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool, 104 seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
105 pg->pgid.seed, pg->primary_temp.osd); 105 pg->pgid.seed, pg->primary_temp.osd);
106 } 106 }
107 for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) {
108 struct ceph_pg_mapping *pg =
109 rb_entry(n, struct ceph_pg_mapping, node);
110
111 seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool,
112 pg->pgid.seed);
113 for (i = 0; i < pg->pg_upmap.len; i++)
114 seq_printf(s, "%s%d", (i == 0 ? "" : ","),
115 pg->pg_upmap.osds[i]);
116 seq_printf(s, "]\n");
117 }
118 for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) {
119 struct ceph_pg_mapping *pg =
120 rb_entry(n, struct ceph_pg_mapping, node);
121
122 seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool,
123 pg->pgid.seed);
124 for (i = 0; i < pg->pg_upmap_items.len; i++)
125 seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","),
126 pg->pg_upmap_items.from_to[i][0],
127 pg->pg_upmap_items.from_to[i][1]);
128 seq_printf(s, "]\n");
129 }
107 130
108 up_read(&osdc->lock); 131 up_read(&osdc->lock);
109 return 0; 132 return 0;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f6d561edd511..a3f60d0bfd13 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -735,6 +735,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
735 map->pool_max = -1; 735 map->pool_max = -1;
736 map->pg_temp = RB_ROOT; 736 map->pg_temp = RB_ROOT;
737 map->primary_temp = RB_ROOT; 737 map->primary_temp = RB_ROOT;
738 map->pg_upmap = RB_ROOT;
739 map->pg_upmap_items = RB_ROOT;
738 mutex_init(&map->crush_workspace_mutex); 740 mutex_init(&map->crush_workspace_mutex);
739 741
740 return map; 742 return map;
@@ -759,6 +761,20 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
759 erase_pg_mapping(&map->primary_temp, pg); 761 erase_pg_mapping(&map->primary_temp, pg);
760 free_pg_mapping(pg); 762 free_pg_mapping(pg);
761 } 763 }
764 while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
765 struct ceph_pg_mapping *pg =
766 rb_entry(rb_first(&map->pg_upmap),
767 struct ceph_pg_mapping, node);
768 rb_erase(&pg->node, &map->pg_upmap);
769 kfree(pg);
770 }
771 while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
772 struct ceph_pg_mapping *pg =
773 rb_entry(rb_first(&map->pg_upmap_items),
774 struct ceph_pg_mapping, node);
775 rb_erase(&pg->node, &map->pg_upmap_items);
776 kfree(pg);
777 }
762 while (!RB_EMPTY_ROOT(&map->pg_pools)) { 778 while (!RB_EMPTY_ROOT(&map->pg_pools)) {
763 struct ceph_pg_pool_info *pi = 779 struct ceph_pg_pool_info *pi =
764 rb_entry(rb_first(&map->pg_pools), 780 rb_entry(rb_first(&map->pg_pools),
@@ -1161,6 +1177,75 @@ e_inval:
1161 return -EINVAL; 1177 return -EINVAL;
1162} 1178}
1163 1179
1180static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
1181 bool __unused)
1182{
1183 return __decode_pg_temp(p, end, false);
1184}
1185
1186static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
1187{
1188 return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
1189 false);
1190}
1191
1192static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
1193{
1194 return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
1195 true);
1196}
1197
1198static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
1199{
1200 return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
1201}
1202
1203static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
1204 bool __unused)
1205{
1206 struct ceph_pg_mapping *pg;
1207 u32 len, i;
1208
1209 ceph_decode_32_safe(p, end, len, e_inval);
1210 if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
1211 return ERR_PTR(-EINVAL);
1212
1213 ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
1214 pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
1215 if (!pg)
1216 return ERR_PTR(-ENOMEM);
1217
1218 pg->pg_upmap_items.len = len;
1219 for (i = 0; i < len; i++) {
1220 pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
1221 pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
1222 }
1223
1224 return pg;
1225
1226e_inval:
1227 return ERR_PTR(-EINVAL);
1228}
1229
1230static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
1231{
1232 return decode_pg_mapping(p, end, &map->pg_upmap_items,
1233 __decode_pg_upmap_items, false);
1234}
1235
1236static int decode_new_pg_upmap_items(void **p, void *end,
1237 struct ceph_osdmap *map)
1238{
1239 return decode_pg_mapping(p, end, &map->pg_upmap_items,
1240 __decode_pg_upmap_items, true);
1241}
1242
1243static int decode_old_pg_upmap_items(void **p, void *end,
1244 struct ceph_osdmap *map)
1245{
1246 return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
1247}
1248
1164/* 1249/*
1165 * decode a full map. 1250 * decode a full map.
1166 */ 1251 */
@@ -1250,9 +1335,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
1250 if (err) 1335 if (err)
1251 goto bad; 1336 goto bad;
1252 } else { 1337 } else {
1253 /* XXX can this happen? */ 1338 WARN_ON(map->osd_primary_affinity);
1254 kfree(map->osd_primary_affinity);
1255 map->osd_primary_affinity = NULL;
1256 } 1339 }
1257 1340
1258 /* crush */ 1341 /* crush */
@@ -1261,6 +1344,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
1261 if (err) 1344 if (err)
1262 goto bad; 1345 goto bad;
1263 1346
1347 *p += len;
1348 if (struct_v >= 3) {
1349 /* erasure_code_profiles */
1350 ceph_decode_skip_map_of_map(p, end, string, string, string,
1351 bad);
1352 }
1353
1354 if (struct_v >= 4) {
1355 err = decode_pg_upmap(p, end, map);
1356 if (err)
1357 goto bad;
1358
1359 err = decode_pg_upmap_items(p, end, map);
1360 if (err)
1361 goto bad;
1362 } else {
1363 WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
1364 WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
1365 }
1366
1264 /* ignore the rest */ 1367 /* ignore the rest */
1265 *p = end; 1368 *p = end;
1266 1369
@@ -1520,6 +1623,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
1520 goto bad; 1623 goto bad;
1521 } 1624 }
1522 1625
1626 if (struct_v >= 3) {
1627 /* new_erasure_code_profiles */
1628 ceph_decode_skip_map_of_map(p, end, string, string, string,
1629 bad);
1630 /* old_erasure_code_profiles */
1631 ceph_decode_skip_set(p, end, string, bad);
1632 }
1633
1634 if (struct_v >= 4) {
1635 err = decode_new_pg_upmap(p, end, map);
1636 if (err)
1637 goto bad;
1638
1639 err = decode_old_pg_upmap(p, end, map);
1640 if (err)
1641 goto bad;
1642
1643 err = decode_new_pg_upmap_items(p, end, map);
1644 if (err)
1645 goto bad;
1646
1647 err = decode_old_pg_upmap_items(p, end, map);
1648 if (err)
1649 goto bad;
1650 }
1651
1523 /* ignore the rest */ 1652 /* ignore the rest */
1524 *p = end; 1653 *p = end;
1525 1654