diff options
-rw-r--r-- | include/linux/ceph/ceph_features.h | 4 | ||||
-rw-r--r-- | include/linux/ceph/osdmap.h | 2 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 39 |
3 files changed, 31 insertions, 14 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index ab0a54286e0d..76554cecaab2 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h | |||
@@ -34,6 +34,7 @@ | |||
34 | #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) | 34 | #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) |
35 | #define CEPH_FEATURE_OSD_HBMSGS (1<<28) | 35 | #define CEPH_FEATURE_OSD_HBMSGS (1<<28) |
36 | #define CEPH_FEATURE_MDSENC (1<<29) | 36 | #define CEPH_FEATURE_MDSENC (1<<29) |
37 | #define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) | ||
37 | 38 | ||
38 | /* | 39 | /* |
39 | * Features supported. | 40 | * Features supported. |
@@ -45,7 +46,8 @@ | |||
45 | CEPH_FEATURE_OSDENC | \ | 46 | CEPH_FEATURE_OSDENC | \ |
46 | CEPH_FEATURE_CRUSH_TUNABLES | \ | 47 | CEPH_FEATURE_CRUSH_TUNABLES | \ |
47 | CEPH_FEATURE_CRUSH_TUNABLES2 | \ | 48 | CEPH_FEATURE_CRUSH_TUNABLES2 | \ |
48 | CEPH_FEATURE_REPLY_CREATE_INODE) | 49 | CEPH_FEATURE_REPLY_CREATE_INODE | \ |
50 | CEPH_FEATURE_OSDHASHPSPOOL) | ||
49 | 51 | ||
50 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ | 52 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ |
51 | (CEPH_FEATURE_NOSRCADDR | \ | 53 | (CEPH_FEATURE_NOSRCADDR | \ |
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 35985125f118..c819190d1642 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
@@ -23,6 +23,8 @@ struct ceph_pg { | |||
23 | uint32_t seed; | 23 | uint32_t seed; |
24 | }; | 24 | }; |
25 | 25 | ||
26 | #define CEPH_POOL_FLAG_HASHPSPOOL 1 | ||
27 | |||
26 | struct ceph_pg_pool_info { | 28 | struct ceph_pg_pool_info { |
27 | struct rb_node node; | 29 | struct rb_node node; |
28 | s64 id; | 30 | s64 id; |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 378471644501..69bc4bf89e3e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -1127,18 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1127 | struct ceph_pg_mapping *pg; | 1127 | struct ceph_pg_mapping *pg; |
1128 | struct ceph_pg_pool_info *pool; | 1128 | struct ceph_pg_pool_info *pool; |
1129 | int ruleno; | 1129 | int ruleno; |
1130 | unsigned int poolid, ps, pps, t, r; | 1130 | int r; |
1131 | u32 pps; | ||
1131 | 1132 | ||
1132 | poolid = pgid.pool; | 1133 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); |
1133 | ps = pgid.seed; | ||
1134 | |||
1135 | pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); | ||
1136 | if (!pool) | 1134 | if (!pool) |
1137 | return NULL; | 1135 | return NULL; |
1138 | 1136 | ||
1139 | /* pg_temp? */ | 1137 | /* pg_temp? */ |
1140 | t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask); | 1138 | pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, |
1141 | pgid.seed = t; | 1139 | pool->pgp_num_mask); |
1142 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); | 1140 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); |
1143 | if (pg) { | 1141 | if (pg) { |
1144 | *num = pg->len; | 1142 | *num = pg->len; |
@@ -1149,20 +1147,35 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1149 | ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, | 1147 | ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, |
1150 | pool->type, pool->size); | 1148 | pool->type, pool->size); |
1151 | if (ruleno < 0) { | 1149 | if (ruleno < 0) { |
1152 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", | 1150 | pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", |
1153 | poolid, pool->crush_ruleset, pool->type, | 1151 | pgid.pool, pool->crush_ruleset, pool->type, |
1154 | pool->size); | 1152 | pool->size); |
1155 | return NULL; | 1153 | return NULL; |
1156 | } | 1154 | } |
1157 | 1155 | ||
1158 | pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask); | 1156 | if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { |
1159 | pps += poolid; | 1157 | /* hash pool id and seed sothat pool PGs do not overlap */ |
1158 | pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, | ||
1159 | ceph_stable_mod(pgid.seed, pool->pgp_num, | ||
1160 | pool->pgp_num_mask), | ||
1161 | pgid.pool); | ||
1162 | } else { | ||
1163 | /* | ||
1164 | * legacy ehavior: add ps and pool together. this is | ||
1165 | * not a great approach because the PGs from each pool | ||
1166 | * will overlap on top of each other: 0.5 == 1.4 == | ||
1167 | * 2.3 == ... | ||
1168 | */ | ||
1169 | pps = ceph_stable_mod(pgid.seed, pool->pgp_num, | ||
1170 | pool->pgp_num_mask) + | ||
1171 | (unsigned)pgid.pool; | ||
1172 | } | ||
1160 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, | 1173 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, |
1161 | min_t(int, pool->size, *num), | 1174 | min_t(int, pool->size, *num), |
1162 | osdmap->osd_weight); | 1175 | osdmap->osd_weight); |
1163 | if (r < 0) { | 1176 | if (r < 0) { |
1164 | pr_err("error %d from crush rule: pool %d ruleset %d type %d" | 1177 | pr_err("error %d from crush rule: pool %lld ruleset %d type %d" |
1165 | " size %d\n", r, poolid, pool->crush_ruleset, | 1178 | " size %d\n", r, pgid.pool, pool->crush_ruleset, |
1166 | pool->type, pool->size); | 1179 | pool->type, pool->size); |
1167 | return NULL; | 1180 | return NULL; |
1168 | } | 1181 | } |