aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ceph/ceph_features.h4
-rw-r--r--include/linux/ceph/osdmap.h2
-rw-r--r--net/ceph/osdmap.c39
3 files changed, 31 insertions, 14 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index ab0a54286e0d..76554cecaab2 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -34,6 +34,7 @@
34#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) 34#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
35#define CEPH_FEATURE_OSD_HBMSGS (1<<28) 35#define CEPH_FEATURE_OSD_HBMSGS (1<<28)
36#define CEPH_FEATURE_MDSENC (1<<29) 36#define CEPH_FEATURE_MDSENC (1<<29)
37#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30)
37 38
38/* 39/*
39 * Features supported. 40 * Features supported.
@@ -45,7 +46,8 @@
45 CEPH_FEATURE_OSDENC | \ 46 CEPH_FEATURE_OSDENC | \
46 CEPH_FEATURE_CRUSH_TUNABLES | \ 47 CEPH_FEATURE_CRUSH_TUNABLES | \
47 CEPH_FEATURE_CRUSH_TUNABLES2 | \ 48 CEPH_FEATURE_CRUSH_TUNABLES2 | \
48 CEPH_FEATURE_REPLY_CREATE_INODE) 49 CEPH_FEATURE_REPLY_CREATE_INODE | \
50 CEPH_FEATURE_OSDHASHPSPOOL)
49 51
50#define CEPH_FEATURES_REQUIRED_DEFAULT \ 52#define CEPH_FEATURES_REQUIRED_DEFAULT \
51 (CEPH_FEATURE_NOSRCADDR | \ 53 (CEPH_FEATURE_NOSRCADDR | \
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 35985125f118..c819190d1642 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -23,6 +23,8 @@ struct ceph_pg {
23 uint32_t seed; 23 uint32_t seed;
24}; 24};
25 25
26#define CEPH_POOL_FLAG_HASHPSPOOL 1
27
26struct ceph_pg_pool_info { 28struct ceph_pg_pool_info {
27 struct rb_node node; 29 struct rb_node node;
28 s64 id; 30 s64 id;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 378471644501..69bc4bf89e3e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1127,18 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1127 struct ceph_pg_mapping *pg; 1127 struct ceph_pg_mapping *pg;
1128 struct ceph_pg_pool_info *pool; 1128 struct ceph_pg_pool_info *pool;
1129 int ruleno; 1129 int ruleno;
1130 unsigned int poolid, ps, pps, t, r; 1130 int r;
1131 u32 pps;
1131 1132
1132 poolid = pgid.pool; 1133 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
1133 ps = pgid.seed;
1134
1135 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
1136 if (!pool) 1134 if (!pool)
1137 return NULL; 1135 return NULL;
1138 1136
1139 /* pg_temp? */ 1137 /* pg_temp? */
1140 t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask); 1138 pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
1141 pgid.seed = t; 1139 pool->pgp_num_mask);
1142 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1140 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1143 if (pg) { 1141 if (pg) {
1144 *num = pg->len; 1142 *num = pg->len;
@@ -1149,20 +1147,35 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1149 ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, 1147 ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
1150 pool->type, pool->size); 1148 pool->type, pool->size);
1151 if (ruleno < 0) { 1149 if (ruleno < 0) {
1152 pr_err("no crush rule pool %d ruleset %d type %d size %d\n", 1150 pr_err("no crush rule pool %lld ruleset %d type %d size %d\n",
1153 poolid, pool->crush_ruleset, pool->type, 1151 pgid.pool, pool->crush_ruleset, pool->type,
1154 pool->size); 1152 pool->size);
1155 return NULL; 1153 return NULL;
1156 } 1154 }
1157 1155
1158 pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask); 1156 if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
1159 pps += poolid; 1157 /* hash pool id and seed sothat pool PGs do not overlap */
1158 pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
1159 ceph_stable_mod(pgid.seed, pool->pgp_num,
1160 pool->pgp_num_mask),
1161 pgid.pool);
1162 } else {
1163 /*
1164 * legacy ehavior: add ps and pool together. this is
1165 * not a great approach because the PGs from each pool
1166 * will overlap on top of each other: 0.5 == 1.4 ==
1167 * 2.3 == ...
1168 */
1169 pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
1170 pool->pgp_num_mask) +
1171 (unsigned)pgid.pool;
1172 }
1160 r = crush_do_rule(osdmap->crush, ruleno, pps, osds, 1173 r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
1161 min_t(int, pool->size, *num), 1174 min_t(int, pool->size, *num),
1162 osdmap->osd_weight); 1175 osdmap->osd_weight);
1163 if (r < 0) { 1176 if (r < 0) {
1164 pr_err("error %d from crush rule: pool %d ruleset %d type %d" 1177 pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
1165 " size %d\n", r, poolid, pool->crush_ruleset, 1178 " size %d\n", r, pgid.pool, pool->crush_ruleset,
1166 pool->type, pool->size); 1179 pool->type, pool->size);
1167 return NULL; 1180 return NULL;
1168 } 1181 }