diff options
| author | Ilya Dryomov <idryomov@gmail.com> | 2017-06-21 11:27:18 -0400 |
|---|---|---|
| committer | Ilya Dryomov <idryomov@gmail.com> | 2017-07-07 11:25:18 -0400 |
| commit | 6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d (patch) | |
| tree | 00076a72eda7738433726f6e6bc4ce1644d370f6 | |
| parent | 278b1d709c6acc6f7d138fed775c76695b068e43 (diff) | |
libceph: pg_upmap[_items] infrastructure
pg_temp and pg_upmap encodings are the same (PG -> array of osds),
except for the incremental remove: it's an empty mapping in new_pg_temp
for pg_temp and a separate old_pg_upmap set for pg_upmap. (This isn't
to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't
looked at as an example for pg_upmap encoding.)
Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap.
__decode_pg_temp() stores into pg_temp union member, but since pg_upmap
union member is identical, reading through pg_upmap later is OK.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
| -rw-r--r-- | include/linux/ceph/osdmap.h | 10 | ||||
| -rw-r--r-- | net/ceph/debugfs.c | 23 | ||||
| -rw-r--r-- | net/ceph/osdmap.c | 135 |
3 files changed, 164 insertions, 4 deletions
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index fe6d189bdd30..c612cff81f5c 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
| @@ -143,10 +143,14 @@ struct ceph_pg_mapping { | |||
| 143 | struct { | 143 | struct { |
| 144 | int len; | 144 | int len; |
| 145 | int osds[]; | 145 | int osds[]; |
| 146 | } pg_temp; | 146 | } pg_temp, pg_upmap; |
| 147 | struct { | 147 | struct { |
| 148 | int osd; | 148 | int osd; |
| 149 | } primary_temp; | 149 | } primary_temp; |
| 150 | struct { | ||
| 151 | int len; | ||
| 152 | int from_to[][2]; | ||
| 153 | } pg_upmap_items; | ||
| 150 | }; | 154 | }; |
| 151 | }; | 155 | }; |
| 152 | 156 | ||
| @@ -165,6 +169,10 @@ struct ceph_osdmap { | |||
| 165 | struct rb_root pg_temp; | 169 | struct rb_root pg_temp; |
| 166 | struct rb_root primary_temp; | 170 | struct rb_root primary_temp; |
| 167 | 171 | ||
| 172 | /* remap (post-CRUSH, pre-up) */ | ||
| 173 | struct rb_root pg_upmap; /* PG := raw set */ | ||
| 174 | struct rb_root pg_upmap_items; /* from -> to within raw set */ | ||
| 175 | |||
| 168 | u32 *osd_primary_affinity; | 176 | u32 *osd_primary_affinity; |
| 169 | 177 | ||
| 170 | struct rb_root pg_pools; | 178 | struct rb_root pg_pools; |
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 017f15c575f8..4f57d5bcaba2 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
| @@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
| 104 | seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool, | 104 | seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool, |
| 105 | pg->pgid.seed, pg->primary_temp.osd); | 105 | pg->pgid.seed, pg->primary_temp.osd); |
| 106 | } | 106 | } |
| 107 | for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) { | ||
| 108 | struct ceph_pg_mapping *pg = | ||
| 109 | rb_entry(n, struct ceph_pg_mapping, node); | ||
| 110 | |||
| 111 | seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool, | ||
| 112 | pg->pgid.seed); | ||
| 113 | for (i = 0; i < pg->pg_upmap.len; i++) | ||
| 114 | seq_printf(s, "%s%d", (i == 0 ? "" : ","), | ||
| 115 | pg->pg_upmap.osds[i]); | ||
| 116 | seq_printf(s, "]\n"); | ||
| 117 | } | ||
| 118 | for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) { | ||
| 119 | struct ceph_pg_mapping *pg = | ||
| 120 | rb_entry(n, struct ceph_pg_mapping, node); | ||
| 121 | |||
| 122 | seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool, | ||
| 123 | pg->pgid.seed); | ||
| 124 | for (i = 0; i < pg->pg_upmap_items.len; i++) | ||
| 125 | seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","), | ||
| 126 | pg->pg_upmap_items.from_to[i][0], | ||
| 127 | pg->pg_upmap_items.from_to[i][1]); | ||
| 128 | seq_printf(s, "]\n"); | ||
| 129 | } | ||
| 107 | 130 | ||
| 108 | up_read(&osdc->lock); | 131 | up_read(&osdc->lock); |
| 109 | return 0; | 132 | return 0; |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index f6d561edd511..a3f60d0bfd13 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -735,6 +735,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void) | |||
| 735 | map->pool_max = -1; | 735 | map->pool_max = -1; |
| 736 | map->pg_temp = RB_ROOT; | 736 | map->pg_temp = RB_ROOT; |
| 737 | map->primary_temp = RB_ROOT; | 737 | map->primary_temp = RB_ROOT; |
| 738 | map->pg_upmap = RB_ROOT; | ||
| 739 | map->pg_upmap_items = RB_ROOT; | ||
| 738 | mutex_init(&map->crush_workspace_mutex); | 740 | mutex_init(&map->crush_workspace_mutex); |
| 739 | 741 | ||
| 740 | return map; | 742 | return map; |
| @@ -759,6 +761,20 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) | |||
| 759 | erase_pg_mapping(&map->primary_temp, pg); | 761 | erase_pg_mapping(&map->primary_temp, pg); |
| 760 | free_pg_mapping(pg); | 762 | free_pg_mapping(pg); |
| 761 | } | 763 | } |
| 764 | while (!RB_EMPTY_ROOT(&map->pg_upmap)) { | ||
| 765 | struct ceph_pg_mapping *pg = | ||
| 766 | rb_entry(rb_first(&map->pg_upmap), | ||
| 767 | struct ceph_pg_mapping, node); | ||
| 768 | rb_erase(&pg->node, &map->pg_upmap); | ||
| 769 | kfree(pg); | ||
| 770 | } | ||
| 771 | while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) { | ||
| 772 | struct ceph_pg_mapping *pg = | ||
| 773 | rb_entry(rb_first(&map->pg_upmap_items), | ||
| 774 | struct ceph_pg_mapping, node); | ||
| 775 | rb_erase(&pg->node, &map->pg_upmap_items); | ||
| 776 | kfree(pg); | ||
| 777 | } | ||
| 762 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | 778 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { |
| 763 | struct ceph_pg_pool_info *pi = | 779 | struct ceph_pg_pool_info *pi = |
| 764 | rb_entry(rb_first(&map->pg_pools), | 780 | rb_entry(rb_first(&map->pg_pools), |
| @@ -1161,6 +1177,75 @@ e_inval: | |||
| 1161 | return -EINVAL; | 1177 | return -EINVAL; |
| 1162 | } | 1178 | } |
| 1163 | 1179 | ||
| 1180 | static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end, | ||
| 1181 | bool __unused) | ||
| 1182 | { | ||
| 1183 | return __decode_pg_temp(p, end, false); | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map) | ||
| 1187 | { | ||
| 1188 | return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap, | ||
| 1189 | false); | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map) | ||
| 1193 | { | ||
| 1194 | return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap, | ||
| 1195 | true); | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map) | ||
| 1199 | { | ||
| 1200 | return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true); | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end, | ||
| 1204 | bool __unused) | ||
| 1205 | { | ||
| 1206 | struct ceph_pg_mapping *pg; | ||
| 1207 | u32 len, i; | ||
| 1208 | |||
| 1209 | ceph_decode_32_safe(p, end, len, e_inval); | ||
| 1210 | if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32))) | ||
| 1211 | return ERR_PTR(-EINVAL); | ||
| 1212 | |||
| 1213 | ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval); | ||
| 1214 | pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO); | ||
| 1215 | if (!pg) | ||
| 1216 | return ERR_PTR(-ENOMEM); | ||
| 1217 | |||
| 1218 | pg->pg_upmap_items.len = len; | ||
| 1219 | for (i = 0; i < len; i++) { | ||
| 1220 | pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p); | ||
| 1221 | pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p); | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | return pg; | ||
| 1225 | |||
| 1226 | e_inval: | ||
| 1227 | return ERR_PTR(-EINVAL); | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map) | ||
| 1231 | { | ||
| 1232 | return decode_pg_mapping(p, end, &map->pg_upmap_items, | ||
| 1233 | __decode_pg_upmap_items, false); | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | static int decode_new_pg_upmap_items(void **p, void *end, | ||
| 1237 | struct ceph_osdmap *map) | ||
| 1238 | { | ||
| 1239 | return decode_pg_mapping(p, end, &map->pg_upmap_items, | ||
| 1240 | __decode_pg_upmap_items, true); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | static int decode_old_pg_upmap_items(void **p, void *end, | ||
| 1244 | struct ceph_osdmap *map) | ||
| 1245 | { | ||
| 1246 | return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true); | ||
| 1247 | } | ||
| 1248 | |||
| 1164 | /* | 1249 | /* |
| 1165 | * decode a full map. | 1250 | * decode a full map. |
| 1166 | */ | 1251 | */ |
| @@ -1250,9 +1335,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) | |||
| 1250 | if (err) | 1335 | if (err) |
| 1251 | goto bad; | 1336 | goto bad; |
| 1252 | } else { | 1337 | } else { |
| 1253 | /* XXX can this happen? */ | 1338 | WARN_ON(map->osd_primary_affinity); |
| 1254 | kfree(map->osd_primary_affinity); | ||
| 1255 | map->osd_primary_affinity = NULL; | ||
| 1256 | } | 1339 | } |
| 1257 | 1340 | ||
| 1258 | /* crush */ | 1341 | /* crush */ |
| @@ -1261,6 +1344,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) | |||
| 1261 | if (err) | 1344 | if (err) |
| 1262 | goto bad; | 1345 | goto bad; |
| 1263 | 1346 | ||
| 1347 | *p += len; | ||
| 1348 | if (struct_v >= 3) { | ||
| 1349 | /* erasure_code_profiles */ | ||
| 1350 | ceph_decode_skip_map_of_map(p, end, string, string, string, | ||
| 1351 | bad); | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | if (struct_v >= 4) { | ||
| 1355 | err = decode_pg_upmap(p, end, map); | ||
| 1356 | if (err) | ||
| 1357 | goto bad; | ||
| 1358 | |||
| 1359 | err = decode_pg_upmap_items(p, end, map); | ||
| 1360 | if (err) | ||
| 1361 | goto bad; | ||
| 1362 | } else { | ||
| 1363 | WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap)); | ||
| 1364 | WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items)); | ||
| 1365 | } | ||
| 1366 | |||
| 1264 | /* ignore the rest */ | 1367 | /* ignore the rest */ |
| 1265 | *p = end; | 1368 | *p = end; |
| 1266 | 1369 | ||
| @@ -1520,6 +1623,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 1520 | goto bad; | 1623 | goto bad; |
| 1521 | } | 1624 | } |
| 1522 | 1625 | ||
| 1626 | if (struct_v >= 3) { | ||
| 1627 | /* new_erasure_code_profiles */ | ||
| 1628 | ceph_decode_skip_map_of_map(p, end, string, string, string, | ||
| 1629 | bad); | ||
| 1630 | /* old_erasure_code_profiles */ | ||
| 1631 | ceph_decode_skip_set(p, end, string, bad); | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | if (struct_v >= 4) { | ||
| 1635 | err = decode_new_pg_upmap(p, end, map); | ||
| 1636 | if (err) | ||
| 1637 | goto bad; | ||
| 1638 | |||
| 1639 | err = decode_old_pg_upmap(p, end, map); | ||
| 1640 | if (err) | ||
| 1641 | goto bad; | ||
| 1642 | |||
| 1643 | err = decode_new_pg_upmap_items(p, end, map); | ||
| 1644 | if (err) | ||
| 1645 | goto bad; | ||
| 1646 | |||
| 1647 | err = decode_old_pg_upmap_items(p, end, map); | ||
| 1648 | if (err) | ||
| 1649 | goto bad; | ||
| 1650 | } | ||
| 1651 | |||
| 1523 | /* ignore the rest */ | 1652 | /* ignore the rest */ |
| 1524 | *p = end; | 1653 | *p = end; |
| 1525 | 1654 | ||
