aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/osdmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-28 14:02:23 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-28 14:02:23 -0500
commitd891ea23d5203e5c47439b2a174f86a00b356a6c (patch)
tree3876cefcced9df5519f437cd8eb275cb979b93f6 /net/ceph/osdmap.c
parent08d21b5f93eb92a781daea71b6fcb3a340909141 (diff)
parent125d725c923527a85876c031028c7f55c28b74b3 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil: "This is a big batch. From Ilya we have: - rbd support for more than ~250 mapped devices (now uses same scheme that SCSI does for device major/minor numbering) - crush updates for new mapping behaviors (will be needed for coming erasure coding support, among other things) - preliminary support for tiered storage pools There is also a big series fixing a pile cephfs bugs with clustered MDSs from Yan Zheng, ACL support for cephfs from Guangliang Zhao, ceph fscache improvements from Li Wang, improved behavior when we get ENOSPC from Josh Durgin, some readv/writev improvements from Majianpeng, and the usual mix of small cleanups" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (76 commits) ceph: cast PAGE_SIZE to size_t in ceph_sync_write() ceph: fix dout() compile warnings in ceph_filemap_fault() libceph: support CEPH_FEATURE_OSD_CACHEPOOL feature libceph: follow redirect replies from osds libceph: rename ceph_osd_request::r_{oloc,oid} to r_base_{oloc,oid} libceph: follow {read,write}_tier fields on osd request submission libceph: add ceph_pg_pool_by_id() libceph: CEPH_OSD_FLAG_* enum update libceph: replace ceph_calc_ceph_pg() with ceph_oloc_oid_to_pg() libceph: introduce and start using oid abstraction libceph: rename MAX_OBJ_NAME_SIZE to CEPH_MAX_OID_NAME_LEN libceph: move ceph_file_layout helpers to ceph_fs.h libceph: start using oloc abstraction libceph: dout() is missing a newline libceph: add ceph_kv{malloc,free}() and switch to them libceph: support CEPH_FEATURE_EXPORT_PEER ceph: add imported caps when handling cap export message ceph: add open export target session helper ceph: remove exported caps when handling cap import message ceph: handle session flush message ...
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r--net/ceph/osdmap.c78
1 files changed, 60 insertions, 18 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index dbd9a4792427..aade4a5c1c07 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -464,6 +464,11 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
464 return NULL; 464 return NULL;
465} 465}
466 466
467struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
468{
469 return __lookup_pg_pool(&map->pg_pools, id);
470}
471
467const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) 472const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
468{ 473{
469 struct ceph_pg_pool_info *pi; 474 struct ceph_pg_pool_info *pi;
@@ -514,8 +519,8 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
514 pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); 519 pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
515 return -EINVAL; 520 return -EINVAL;
516 } 521 }
517 if (cv > 7) { 522 if (cv > 9) {
518 pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); 523 pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
519 return -EINVAL; 524 return -EINVAL;
520 } 525 }
521 len = ceph_decode_32(p); 526 len = ceph_decode_32(p);
@@ -543,12 +548,34 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
543 *p += len; 548 *p += len;
544 } 549 }
545 550
546 /* skip removed snaps */ 551 /* skip removed_snaps */
547 num = ceph_decode_32(p); 552 num = ceph_decode_32(p);
548 *p += num * (8 + 8); 553 *p += num * (8 + 8);
549 554
550 *p += 8; /* skip auid */ 555 *p += 8; /* skip auid */
551 pi->flags = ceph_decode_64(p); 556 pi->flags = ceph_decode_64(p);
557 *p += 4; /* skip crash_replay_interval */
558
559 if (ev >= 7)
560 *p += 1; /* skip min_size */
561
562 if (ev >= 8)
563 *p += 8 + 8; /* skip quota_max_* */
564
565 if (ev >= 9) {
566 /* skip tiers */
567 num = ceph_decode_32(p);
568 *p += num * 8;
569
570 *p += 8; /* skip tier_of */
571 *p += 1; /* skip cache_mode */
572
573 pi->read_tier = ceph_decode_64(p);
574 pi->write_tier = ceph_decode_64(p);
575 } else {
576 pi->read_tier = -1;
577 pi->write_tier = -1;
578 }
552 579
553 /* ignore the rest */ 580 /* ignore the rest */
554 581
@@ -1090,25 +1117,40 @@ invalid:
1090EXPORT_SYMBOL(ceph_calc_file_object_mapping); 1117EXPORT_SYMBOL(ceph_calc_file_object_mapping);
1091 1118
1092/* 1119/*
1093 * calculate an object layout (i.e. pgid) from an oid, 1120 * Calculate mapping of a (oloc, oid) pair to a PG. Should only be
1094 * file_layout, and osdmap 1121 * called with target's (oloc, oid), since tiering isn't taken into
1122 * account.
1095 */ 1123 */
1096int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid, 1124int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
1097 struct ceph_osdmap *osdmap, uint64_t pool) 1125 struct ceph_object_locator *oloc,
1126 struct ceph_object_id *oid,
1127 struct ceph_pg *pg_out)
1098{ 1128{
1099 struct ceph_pg_pool_info *pool_info; 1129 struct ceph_pg_pool_info *pi;
1100 1130
1101 BUG_ON(!osdmap); 1131 pi = __lookup_pg_pool(&osdmap->pg_pools, oloc->pool);
1102 pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool); 1132 if (!pi)
1103 if (!pool_info)
1104 return -EIO; 1133 return -EIO;
1105 pg->pool = pool;
1106 pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
1107 1134
1108 dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed); 1135 pg_out->pool = oloc->pool;
1136 pg_out->seed = ceph_str_hash(pi->object_hash, oid->name,
1137 oid->name_len);
1138
1139 dout("%s '%.*s' pgid %llu.%x\n", __func__, oid->name_len, oid->name,
1140 pg_out->pool, pg_out->seed);
1109 return 0; 1141 return 0;
1110} 1142}
1111EXPORT_SYMBOL(ceph_calc_ceph_pg); 1143EXPORT_SYMBOL(ceph_oloc_oid_to_pg);
1144
1145static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x,
1146 int *result, int result_max,
1147 const __u32 *weight, int weight_max)
1148{
1149 int scratch[result_max * 3];
1150
1151 return crush_do_rule(map, ruleno, x, result, result_max,
1152 weight, weight_max, scratch);
1153}
1112 1154
1113/* 1155/*
1114 * Calculate raw osd vector for the given pgid. Return pointer to osd 1156 * Calculate raw osd vector for the given pgid. Return pointer to osd
@@ -1163,9 +1205,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1163 pool->pgp_num_mask) + 1205 pool->pgp_num_mask) +
1164 (unsigned)pgid.pool; 1206 (unsigned)pgid.pool;
1165 } 1207 }
1166 r = crush_do_rule(osdmap->crush, ruleno, pps, osds, 1208 r = crush_do_rule_ary(osdmap->crush, ruleno, pps,
1167 min_t(int, pool->size, *num), 1209 osds, min_t(int, pool->size, *num),
1168 osdmap->osd_weight); 1210 osdmap->osd_weight, osdmap->max_osd);
1169 if (r < 0) { 1211 if (r < 0) {
1170 pr_err("error %d from crush rule: pool %lld ruleset %d type %d" 1212 pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
1171 " size %d\n", r, pgid.pool, pool->crush_ruleset, 1213 " size %d\n", r, pgid.pool, pool->crush_ruleset,