diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-28 14:02:23 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-01-28 14:02:23 -0500 |
commit | d891ea23d5203e5c47439b2a174f86a00b356a6c (patch) | |
tree | 3876cefcced9df5519f437cd8eb275cb979b93f6 /net/ceph/osdmap.c | |
parent | 08d21b5f93eb92a781daea71b6fcb3a340909141 (diff) | |
parent | 125d725c923527a85876c031028c7f55c28b74b3 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull ceph updates from Sage Weil:
"This is a big batch. From Ilya we have:
- rbd support for more than ~250 mapped devices (now uses same scheme
that SCSI does for device major/minor numbering)
- crush updates for new mapping behaviors (will be needed for coming
erasure coding support, among other things)
- preliminary support for tiered storage pools
There is also a big series fixing a pile cephfs bugs with clustered
MDSs from Yan Zheng, ACL support for cephfs from Guangliang Zhao, ceph
fscache improvements from Li Wang, improved behavior when we get
ENOSPC from Josh Durgin, some readv/writev improvements from
Majianpeng, and the usual mix of small cleanups"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (76 commits)
ceph: cast PAGE_SIZE to size_t in ceph_sync_write()
ceph: fix dout() compile warnings in ceph_filemap_fault()
libceph: support CEPH_FEATURE_OSD_CACHEPOOL feature
libceph: follow redirect replies from osds
libceph: rename ceph_osd_request::r_{oloc,oid} to r_base_{oloc,oid}
libceph: follow {read,write}_tier fields on osd request submission
libceph: add ceph_pg_pool_by_id()
libceph: CEPH_OSD_FLAG_* enum update
libceph: replace ceph_calc_ceph_pg() with ceph_oloc_oid_to_pg()
libceph: introduce and start using oid abstraction
libceph: rename MAX_OBJ_NAME_SIZE to CEPH_MAX_OID_NAME_LEN
libceph: move ceph_file_layout helpers to ceph_fs.h
libceph: start using oloc abstraction
libceph: dout() is missing a newline
libceph: add ceph_kv{malloc,free}() and switch to them
libceph: support CEPH_FEATURE_EXPORT_PEER
ceph: add imported caps when handling cap export message
ceph: add open export target session helper
ceph: remove exported caps when handling cap import message
ceph: handle session flush message
...
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r-- | net/ceph/osdmap.c | 78 |
1 files changed, 60 insertions, 18 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index dbd9a4792427..aade4a5c1c07 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -464,6 +464,11 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) | |||
464 | return NULL; | 464 | return NULL; |
465 | } | 465 | } |
466 | 466 | ||
467 | struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id) | ||
468 | { | ||
469 | return __lookup_pg_pool(&map->pg_pools, id); | ||
470 | } | ||
471 | |||
467 | const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) | 472 | const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) |
468 | { | 473 | { |
469 | struct ceph_pg_pool_info *pi; | 474 | struct ceph_pg_pool_info *pi; |
@@ -514,8 +519,8 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | |||
514 | pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); | 519 | pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); |
515 | return -EINVAL; | 520 | return -EINVAL; |
516 | } | 521 | } |
517 | if (cv > 7) { | 522 | if (cv > 9) { |
518 | pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); | 523 | pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv); |
519 | return -EINVAL; | 524 | return -EINVAL; |
520 | } | 525 | } |
521 | len = ceph_decode_32(p); | 526 | len = ceph_decode_32(p); |
@@ -543,12 +548,34 @@ static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | |||
543 | *p += len; | 548 | *p += len; |
544 | } | 549 | } |
545 | 550 | ||
546 | /* skip removed snaps */ | 551 | /* skip removed_snaps */ |
547 | num = ceph_decode_32(p); | 552 | num = ceph_decode_32(p); |
548 | *p += num * (8 + 8); | 553 | *p += num * (8 + 8); |
549 | 554 | ||
550 | *p += 8; /* skip auid */ | 555 | *p += 8; /* skip auid */ |
551 | pi->flags = ceph_decode_64(p); | 556 | pi->flags = ceph_decode_64(p); |
557 | *p += 4; /* skip crash_replay_interval */ | ||
558 | |||
559 | if (ev >= 7) | ||
560 | *p += 1; /* skip min_size */ | ||
561 | |||
562 | if (ev >= 8) | ||
563 | *p += 8 + 8; /* skip quota_max_* */ | ||
564 | |||
565 | if (ev >= 9) { | ||
566 | /* skip tiers */ | ||
567 | num = ceph_decode_32(p); | ||
568 | *p += num * 8; | ||
569 | |||
570 | *p += 8; /* skip tier_of */ | ||
571 | *p += 1; /* skip cache_mode */ | ||
572 | |||
573 | pi->read_tier = ceph_decode_64(p); | ||
574 | pi->write_tier = ceph_decode_64(p); | ||
575 | } else { | ||
576 | pi->read_tier = -1; | ||
577 | pi->write_tier = -1; | ||
578 | } | ||
552 | 579 | ||
553 | /* ignore the rest */ | 580 | /* ignore the rest */ |
554 | 581 | ||
@@ -1090,25 +1117,40 @@ invalid: | |||
1090 | EXPORT_SYMBOL(ceph_calc_file_object_mapping); | 1117 | EXPORT_SYMBOL(ceph_calc_file_object_mapping); |
1091 | 1118 | ||
1092 | /* | 1119 | /* |
1093 | * calculate an object layout (i.e. pgid) from an oid, | 1120 | * Calculate mapping of a (oloc, oid) pair to a PG. Should only be |
1094 | * file_layout, and osdmap | 1121 | * called with target's (oloc, oid), since tiering isn't taken into |
1122 | * account. | ||
1095 | */ | 1123 | */ |
1096 | int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid, | 1124 | int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, |
1097 | struct ceph_osdmap *osdmap, uint64_t pool) | 1125 | struct ceph_object_locator *oloc, |
1126 | struct ceph_object_id *oid, | ||
1127 | struct ceph_pg *pg_out) | ||
1098 | { | 1128 | { |
1099 | struct ceph_pg_pool_info *pool_info; | 1129 | struct ceph_pg_pool_info *pi; |
1100 | 1130 | ||
1101 | BUG_ON(!osdmap); | 1131 | pi = __lookup_pg_pool(&osdmap->pg_pools, oloc->pool); |
1102 | pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool); | 1132 | if (!pi) |
1103 | if (!pool_info) | ||
1104 | return -EIO; | 1133 | return -EIO; |
1105 | pg->pool = pool; | ||
1106 | pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid)); | ||
1107 | 1134 | ||
1108 | dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed); | 1135 | pg_out->pool = oloc->pool; |
1136 | pg_out->seed = ceph_str_hash(pi->object_hash, oid->name, | ||
1137 | oid->name_len); | ||
1138 | |||
1139 | dout("%s '%.*s' pgid %llu.%x\n", __func__, oid->name_len, oid->name, | ||
1140 | pg_out->pool, pg_out->seed); | ||
1109 | return 0; | 1141 | return 0; |
1110 | } | 1142 | } |
1111 | EXPORT_SYMBOL(ceph_calc_ceph_pg); | 1143 | EXPORT_SYMBOL(ceph_oloc_oid_to_pg); |
1144 | |||
1145 | static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x, | ||
1146 | int *result, int result_max, | ||
1147 | const __u32 *weight, int weight_max) | ||
1148 | { | ||
1149 | int scratch[result_max * 3]; | ||
1150 | |||
1151 | return crush_do_rule(map, ruleno, x, result, result_max, | ||
1152 | weight, weight_max, scratch); | ||
1153 | } | ||
1112 | 1154 | ||
1113 | /* | 1155 | /* |
1114 | * Calculate raw osd vector for the given pgid. Return pointer to osd | 1156 | * Calculate raw osd vector for the given pgid. Return pointer to osd |
@@ -1163,9 +1205,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1163 | pool->pgp_num_mask) + | 1205 | pool->pgp_num_mask) + |
1164 | (unsigned)pgid.pool; | 1206 | (unsigned)pgid.pool; |
1165 | } | 1207 | } |
1166 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, | 1208 | r = crush_do_rule_ary(osdmap->crush, ruleno, pps, |
1167 | min_t(int, pool->size, *num), | 1209 | osds, min_t(int, pool->size, *num), |
1168 | osdmap->osd_weight); | 1210 | osdmap->osd_weight, osdmap->max_osd); |
1169 | if (r < 0) { | 1211 | if (r < 0) { |
1170 | pr_err("error %d from crush rule: pool %lld ruleset %d type %d" | 1212 | pr_err("error %d from crush rule: pool %lld ruleset %d type %d" |
1171 | " size %d\n", r, pgid.pool, pool->crush_ruleset, | 1213 | " size %d\n", r, pgid.pool, pool->crush_ruleset, |