aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
authorIlya Dryomov <ilya.dryomov@inktank.com>2014-03-19 10:58:36 -0400
committerSage Weil <sage@inktank.com>2014-04-05 00:07:23 -0400
commit48a163dbb517eba13643bf404a0d695c1ab0a60d (patch)
treef935878dc2fe37083044356cc79ede2f14bea764 /net/ceph
parentcc48c3e85f7fc48092f2e9874f1a07dd997d9184 (diff)
crush: fix off-by-one errors in total_tries refactor
Back in 27f4d1f6bc32c2ed7b2c5080cbd58b14df622607 we refactored the CRUSH code to allow adjustment of the retry counts on a per-pool basis. That commit had an off-by-one bug: the previous "tries" counter was a *retry* count, not a *try* count, but the new code was passing in 1 meaning there should be no retries. Fix the ftotal vs tries comparison to use < instead of <= to fix the problem. Note that the original code used <= here, which means the global "choose_total_tries" tunable is actually counting retries. Compensate for that by adding 1 in crush_do_rule when we pull the tunable into the local variable. This was noticed looking at output from a user provided osdmap. Unfortunately the map doesn't illustrate the change in mapping behavior and I haven't managed to construct one yet that does. Inspection of the crush debug output now aligns with prior versions, though. Reflects ceph.git commit 795704fd615f0b008dcc81aa088a859b2d075138. Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Diffstat (limited to 'net/ceph')
-rw-r--r--net/ceph/crush/mapper.c46
1 files changed, 27 insertions, 19 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b703790b4e44..074bb2a5e675 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -292,8 +292,8 @@ static int is_out(const struct crush_map *map,
292 * @outpos: our position in that vector 292 * @outpos: our position in that vector
293 * @tries: number of attempts to make 293 * @tries: number of attempts to make
294 * @recurse_tries: number of attempts to have recursive chooseleaf make 294 * @recurse_tries: number of attempts to have recursive chooseleaf make
295 * @local_tries: localized retries 295 * @local_retries: localized retries
296 * @local_fallback_tries: localized fallback retries 296 * @local_fallback_retries: localized fallback retries
297 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) 297 * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
298 * @out2: second output vector for leaf items (if @recurse_to_leaf) 298 * @out2: second output vector for leaf items (if @recurse_to_leaf)
299 */ 299 */
@@ -304,8 +304,8 @@ static int crush_choose_firstn(const struct crush_map *map,
304 int *out, int outpos, 304 int *out, int outpos,
305 unsigned int tries, 305 unsigned int tries,
306 unsigned int recurse_tries, 306 unsigned int recurse_tries,
307 unsigned int local_tries, 307 unsigned int local_retries,
308 unsigned int local_fallback_tries, 308 unsigned int local_fallback_retries,
309 int recurse_to_leaf, 309 int recurse_to_leaf,
310 int *out2) 310 int *out2)
311{ 311{
@@ -344,9 +344,9 @@ static int crush_choose_firstn(const struct crush_map *map,
344 reject = 1; 344 reject = 1;
345 goto reject; 345 goto reject;
346 } 346 }
347 if (local_fallback_tries > 0 && 347 if (local_fallback_retries > 0 &&
348 flocal >= (in->size>>1) && 348 flocal >= (in->size>>1) &&
349 flocal > local_fallback_tries) 349 flocal > local_fallback_retries)
350 item = bucket_perm_choose(in, x, r); 350 item = bucket_perm_choose(in, x, r);
351 else 351 else
352 item = crush_bucket_choose(in, x, r); 352 item = crush_bucket_choose(in, x, r);
@@ -393,8 +393,8 @@ static int crush_choose_firstn(const struct crush_map *map,
393 x, outpos+1, 0, 393 x, outpos+1, 0,
394 out2, outpos, 394 out2, outpos,
395 recurse_tries, 0, 395 recurse_tries, 0,
396 local_tries, 396 local_retries,
397 local_fallback_tries, 397 local_fallback_retries,
398 0, 398 0,
399 NULL) <= outpos) 399 NULL) <= outpos)
400 /* didn't get leaf */ 400 /* didn't get leaf */
@@ -420,14 +420,14 @@ reject:
420 ftotal++; 420 ftotal++;
421 flocal++; 421 flocal++;
422 422
423 if (collide && flocal <= local_tries) 423 if (collide && flocal <= local_retries)
424 /* retry locally a few times */ 424 /* retry locally a few times */
425 retry_bucket = 1; 425 retry_bucket = 1;
426 else if (local_fallback_tries > 0 && 426 else if (local_fallback_retries > 0 &&
427 flocal <= in->size + local_fallback_tries) 427 flocal <= in->size + local_fallback_retries)
428 /* exhaustive bucket search */ 428 /* exhaustive bucket search */
429 retry_bucket = 1; 429 retry_bucket = 1;
430 else if (ftotal <= tries) 430 else if (ftotal < tries)
431 /* then retry descent */ 431 /* then retry descent */
432 retry_descent = 1; 432 retry_descent = 1;
433 else 433 else
@@ -640,10 +640,18 @@ int crush_do_rule(const struct crush_map *map,
640 __u32 step; 640 __u32 step;
641 int i, j; 641 int i, j;
642 int numrep; 642 int numrep;
643 int choose_tries = map->choose_total_tries; 643 /*
644 int choose_local_tries = map->choose_local_tries; 644 * the original choose_total_tries value was off by one (it
645 int choose_local_fallback_tries = map->choose_local_fallback_tries; 645 * counted "retries" and not "tries"). add one.
646 */
647 int choose_tries = map->choose_total_tries + 1;
646 int choose_leaf_tries = 0; 648 int choose_leaf_tries = 0;
649 /*
650 * the local tries values were counted as "retries", though,
651 * and need no adjustment
652 */
653 int choose_local_retries = map->choose_local_tries;
654 int choose_local_fallback_retries = map->choose_local_fallback_tries;
647 655
648 if ((__u32)ruleno >= map->max_rules) { 656 if ((__u32)ruleno >= map->max_rules) {
649 dprintk(" bad ruleno %d\n", ruleno); 657 dprintk(" bad ruleno %d\n", ruleno);
@@ -677,12 +685,12 @@ int crush_do_rule(const struct crush_map *map,
677 685
678 case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: 686 case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
679 if (curstep->arg1 > 0) 687 if (curstep->arg1 > 0)
680 choose_local_tries = curstep->arg1; 688 choose_local_retries = curstep->arg1;
681 break; 689 break;
682 690
683 case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: 691 case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
684 if (curstep->arg1 > 0) 692 if (curstep->arg1 > 0)
685 choose_local_fallback_tries = curstep->arg1; 693 choose_local_fallback_retries = curstep->arg1;
686 break; 694 break;
687 695
688 case CRUSH_RULE_CHOOSELEAF_FIRSTN: 696 case CRUSH_RULE_CHOOSELEAF_FIRSTN:
@@ -734,8 +742,8 @@ int crush_do_rule(const struct crush_map *map,
734 o+osize, j, 742 o+osize, j,
735 choose_tries, 743 choose_tries,
736 recurse_tries, 744 recurse_tries,
737 choose_local_tries, 745 choose_local_retries,
738 choose_local_fallback_tries, 746 choose_local_fallback_retries,
739 recurse_to_leaf, 747 recurse_to_leaf,
740 c+osize); 748 c+osize);
741 } else { 749 } else {