aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/ceph/ceph_features.h7
-rw-r--r--include/linux/crush/crush.h2
-rw-r--r--net/ceph/crush/mapper.c13
-rw-r--r--net/ceph/osdmap.c6
4 files changed, 23 insertions, 5 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 6b7c6acbb3bf..2160aab482f6 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -14,7 +14,9 @@
14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) 14#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
15/* bits 8-17 defined by user-space; not supported yet here */ 15/* bits 8-17 defined by user-space; not supported yet here */
16#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) 16#define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
17/* bits 19-25 defined by user-space; not supported yet here */ 17/* bits 19-24 defined by user-space; not supported yet here */
18#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
19/* bit 26 defined by user-space; not supported yet here */
18#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) 20#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
19 21
20/* 22/*
@@ -22,7 +24,8 @@
22 */ 24 */
23#define CEPH_FEATURES_SUPPORTED_DEFAULT \ 25#define CEPH_FEATURES_SUPPORTED_DEFAULT \
24 (CEPH_FEATURE_NOSRCADDR | \ 26 (CEPH_FEATURE_NOSRCADDR | \
25 CEPH_FEATURE_CRUSH_TUNABLES | \ 27 CEPH_FEATURE_CRUSH_TUNABLES | \
28 CEPH_FEATURE_CRUSH_TUNABLES2 | \
26 CEPH_FEATURE_REPLY_CREATE_INODE) 29 CEPH_FEATURE_REPLY_CREATE_INODE)
27 30
28#define CEPH_FEATURES_REQUIRED_DEFAULT \ 31#define CEPH_FEATURES_REQUIRED_DEFAULT \
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 25baa287cff7..6a1101f24cfb 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -162,6 +162,8 @@ struct crush_map {
162 __u32 choose_local_fallback_tries; 162 __u32 choose_local_fallback_tries;
163 /* choose attempts before giving up */ 163 /* choose attempts before giving up */
164 __u32 choose_total_tries; 164 __u32 choose_total_tries;
165 /* attempt chooseleaf inner descent once; on failure retry outer descent */
166 __u32 chooseleaf_descend_once;
165}; 167};
166 168
167 169
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 35fce755ce10..96c8a58937db 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
287 * @outpos: our position in that vector 287 * @outpos: our position in that vector
288 * @firstn: true if choosing "first n" items, false if choosing "indep" 288 * @firstn: true if choosing "first n" items, false if choosing "indep"
289 * @recurse_to_leaf: true if we want one device under each item of given type 289 * @recurse_to_leaf: true if we want one device under each item of given type
290 * @descend_once: true if we should only try one descent before giving up
290 * @out2: second output vector for leaf items (if @recurse_to_leaf) 291 * @out2: second output vector for leaf items (if @recurse_to_leaf)
291 */ 292 */
292static int crush_choose(const struct crush_map *map, 293static int crush_choose(const struct crush_map *map,
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
295 int x, int numrep, int type, 296 int x, int numrep, int type,
296 int *out, int outpos, 297 int *out, int outpos,
297 int firstn, int recurse_to_leaf, 298 int firstn, int recurse_to_leaf,
298 int *out2) 299 int descend_once, int *out2)
299{ 300{
300 int rep; 301 int rep;
301 unsigned int ftotal, flocal; 302 unsigned int ftotal, flocal;
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
399 x, outpos+1, 0, 400 x, outpos+1, 0,
400 out2, outpos, 401 out2, outpos,
401 firstn, 0, 402 firstn, 0,
403 map->chooseleaf_descend_once,
402 NULL) <= outpos) 404 NULL) <= outpos)
403 /* didn't get leaf */ 405 /* didn't get leaf */
404 reject = 1; 406 reject = 1;
@@ -422,7 +424,10 @@ reject:
422 ftotal++; 424 ftotal++;
423 flocal++; 425 flocal++;
424 426
425 if (collide && flocal <= map->choose_local_tries) 427 if (reject && descend_once)
428 /* let outer call try again */
429 skip_rep = 1;
430 else if (collide && flocal <= map->choose_local_tries)
426 /* retry locally a few times */ 431 /* retry locally a few times */
427 retry_bucket = 1; 432 retry_bucket = 1;
428 else if (map->choose_local_fallback_tries > 0 && 433 else if (map->choose_local_fallback_tries > 0 &&
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
485 int i, j; 490 int i, j;
486 int numrep; 491 int numrep;
487 int firstn; 492 int firstn;
493 const int descend_once = 0;
488 494
489 if ((__u32)ruleno >= map->max_rules) { 495 if ((__u32)ruleno >= map->max_rules) {
490 dprintk(" bad ruleno %d\n", ruleno); 496 dprintk(" bad ruleno %d\n", ruleno);
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
544 curstep->arg2, 550 curstep->arg2,
545 o+osize, j, 551 o+osize, j,
546 firstn, 552 firstn,
547 recurse_to_leaf, c+osize); 553 recurse_to_leaf,
554 descend_once, c+osize);
548 } 555 }
549 556
550 if (recurse_to_leaf) 557 if (recurse_to_leaf)
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index de73214b5d26..ca05871635bc 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -170,6 +170,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
170 c->choose_local_tries = 2; 170 c->choose_local_tries = 2;
171 c->choose_local_fallback_tries = 5; 171 c->choose_local_fallback_tries = 5;
172 c->choose_total_tries = 19; 172 c->choose_total_tries = 19;
173 c->chooseleaf_descend_once = 0;
173 174
174 ceph_decode_need(p, end, 4*sizeof(u32), bad); 175 ceph_decode_need(p, end, 4*sizeof(u32), bad);
175 magic = ceph_decode_32(p); 176 magic = ceph_decode_32(p);
@@ -336,6 +337,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
336 dout("crush decode tunable choose_total_tries = %d", 337 dout("crush decode tunable choose_total_tries = %d",
337 c->choose_total_tries); 338 c->choose_total_tries);
338 339
340 ceph_decode_need(p, end, sizeof(u32), done);
341 c->chooseleaf_descend_once = ceph_decode_32(p);
342 dout("crush decode tunable chooseleaf_descend_once = %d",
343 c->chooseleaf_descend_once);
344
339done: 345done:
340 dout("crush_decode success\n"); 346 dout("crush_decode success\n");
341 return c; 347 return c;