diff options
author | Ilya Dryomov <ilya.dryomov@inktank.com> | 2014-03-19 10:58:36 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2014-04-05 00:07:23 -0400 |
commit | 48a163dbb517eba13643bf404a0d695c1ab0a60d (patch) | |
tree | f935878dc2fe37083044356cc79ede2f14bea764 /net/ceph | |
parent | cc48c3e85f7fc48092f2e9874f1a07dd997d9184 (diff) |
crush: fix off-by-one errors in total_tries refactor
Back in 27f4d1f6bc32c2ed7b2c5080cbd58b14df622607 we refactored the CRUSH
code to allow adjustment of the retry counts on a per-pool basis. That
commit had an off-by-one bug: the previous "tries" counter was a *retry*
count, not a *try* count, but the new code was passing in 1 meaning
there should be no retries.
Fix the ftotal vs tries comparison to use < instead of <= to fix the
problem. Note that the original code used <= here, which means the
global "choose_total_tries" tunable is actually counting retries.
Compensate for that by adding 1 in crush_do_rule when we pull the tunable
into the local variable.
This was noticed looking at output from a user provided osdmap.
Unfortunately the map doesn't illustrate the change in mapping behavior
and I haven't managed to construct one yet that does. Inspection of the
crush debug output now aligns with prior versions, though.
Reflects ceph.git commit 795704fd615f0b008dcc81aa088a859b2d075138.
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
Diffstat (limited to 'net/ceph')
-rw-r--r-- | net/ceph/crush/mapper.c | 46 |
1 files changed, 27 insertions, 19 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index b703790b4e44..074bb2a5e675 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
@@ -292,8 +292,8 @@ static int is_out(const struct crush_map *map, | |||
292 | * @outpos: our position in that vector | 292 | * @outpos: our position in that vector |
293 | * @tries: number of attempts to make | 293 | * @tries: number of attempts to make |
294 | * @recurse_tries: number of attempts to have recursive chooseleaf make | 294 | * @recurse_tries: number of attempts to have recursive chooseleaf make |
295 | * @local_tries: localized retries | 295 | * @local_retries: localized retries |
296 | * @local_fallback_tries: localized fallback retries | 296 | * @local_fallback_retries: localized fallback retries |
297 | * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) | 297 | * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) |
298 | * @out2: second output vector for leaf items (if @recurse_to_leaf) | 298 | * @out2: second output vector for leaf items (if @recurse_to_leaf) |
299 | */ | 299 | */ |
@@ -304,8 +304,8 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
304 | int *out, int outpos, | 304 | int *out, int outpos, |
305 | unsigned int tries, | 305 | unsigned int tries, |
306 | unsigned int recurse_tries, | 306 | unsigned int recurse_tries, |
307 | unsigned int local_tries, | 307 | unsigned int local_retries, |
308 | unsigned int local_fallback_tries, | 308 | unsigned int local_fallback_retries, |
309 | int recurse_to_leaf, | 309 | int recurse_to_leaf, |
310 | int *out2) | 310 | int *out2) |
311 | { | 311 | { |
@@ -344,9 +344,9 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
344 | reject = 1; | 344 | reject = 1; |
345 | goto reject; | 345 | goto reject; |
346 | } | 346 | } |
347 | if (local_fallback_tries > 0 && | 347 | if (local_fallback_retries > 0 && |
348 | flocal >= (in->size>>1) && | 348 | flocal >= (in->size>>1) && |
349 | flocal > local_fallback_tries) | 349 | flocal > local_fallback_retries) |
350 | item = bucket_perm_choose(in, x, r); | 350 | item = bucket_perm_choose(in, x, r); |
351 | else | 351 | else |
352 | item = crush_bucket_choose(in, x, r); | 352 | item = crush_bucket_choose(in, x, r); |
@@ -393,8 +393,8 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
393 | x, outpos+1, 0, | 393 | x, outpos+1, 0, |
394 | out2, outpos, | 394 | out2, outpos, |
395 | recurse_tries, 0, | 395 | recurse_tries, 0, |
396 | local_tries, | 396 | local_retries, |
397 | local_fallback_tries, | 397 | local_fallback_retries, |
398 | 0, | 398 | 0, |
399 | NULL) <= outpos) | 399 | NULL) <= outpos) |
400 | /* didn't get leaf */ | 400 | /* didn't get leaf */ |
@@ -420,14 +420,14 @@ reject: | |||
420 | ftotal++; | 420 | ftotal++; |
421 | flocal++; | 421 | flocal++; |
422 | 422 | ||
423 | if (collide && flocal <= local_tries) | 423 | if (collide && flocal <= local_retries) |
424 | /* retry locally a few times */ | 424 | /* retry locally a few times */ |
425 | retry_bucket = 1; | 425 | retry_bucket = 1; |
426 | else if (local_fallback_tries > 0 && | 426 | else if (local_fallback_retries > 0 && |
427 | flocal <= in->size + local_fallback_tries) | 427 | flocal <= in->size + local_fallback_retries) |
428 | /* exhaustive bucket search */ | 428 | /* exhaustive bucket search */ |
429 | retry_bucket = 1; | 429 | retry_bucket = 1; |
430 | else if (ftotal <= tries) | 430 | else if (ftotal < tries) |
431 | /* then retry descent */ | 431 | /* then retry descent */ |
432 | retry_descent = 1; | 432 | retry_descent = 1; |
433 | else | 433 | else |
@@ -640,10 +640,18 @@ int crush_do_rule(const struct crush_map *map, | |||
640 | __u32 step; | 640 | __u32 step; |
641 | int i, j; | 641 | int i, j; |
642 | int numrep; | 642 | int numrep; |
643 | int choose_tries = map->choose_total_tries; | 643 | /* |
644 | int choose_local_tries = map->choose_local_tries; | 644 | * the original choose_total_tries value was off by one (it |
645 | int choose_local_fallback_tries = map->choose_local_fallback_tries; | 645 | * counted "retries" and not "tries"). add one. |
646 | */ | ||
647 | int choose_tries = map->choose_total_tries + 1; | ||
646 | int choose_leaf_tries = 0; | 648 | int choose_leaf_tries = 0; |
649 | /* | ||
650 | * the local tries values were counted as "retries", though, | ||
651 | * and need no adjustment | ||
652 | */ | ||
653 | int choose_local_retries = map->choose_local_tries; | ||
654 | int choose_local_fallback_retries = map->choose_local_fallback_tries; | ||
647 | 655 | ||
648 | if ((__u32)ruleno >= map->max_rules) { | 656 | if ((__u32)ruleno >= map->max_rules) { |
649 | dprintk(" bad ruleno %d\n", ruleno); | 657 | dprintk(" bad ruleno %d\n", ruleno); |
@@ -677,12 +685,12 @@ int crush_do_rule(const struct crush_map *map, | |||
677 | 685 | ||
678 | case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: | 686 | case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: |
679 | if (curstep->arg1 > 0) | 687 | if (curstep->arg1 > 0) |
680 | choose_local_tries = curstep->arg1; | 688 | choose_local_retries = curstep->arg1; |
681 | break; | 689 | break; |
682 | 690 | ||
683 | case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: | 691 | case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: |
684 | if (curstep->arg1 > 0) | 692 | if (curstep->arg1 > 0) |
685 | choose_local_fallback_tries = curstep->arg1; | 693 | choose_local_fallback_retries = curstep->arg1; |
686 | break; | 694 | break; |
687 | 695 | ||
688 | case CRUSH_RULE_CHOOSELEAF_FIRSTN: | 696 | case CRUSH_RULE_CHOOSELEAF_FIRSTN: |
@@ -734,8 +742,8 @@ int crush_do_rule(const struct crush_map *map, | |||
734 | o+osize, j, | 742 | o+osize, j, |
735 | choose_tries, | 743 | choose_tries, |
736 | recurse_tries, | 744 | recurse_tries, |
737 | choose_local_tries, | 745 | choose_local_retries, |
738 | choose_local_fallback_tries, | 746 | choose_local_fallback_retries, |
739 | recurse_to_leaf, | 747 | recurse_to_leaf, |
740 | c+osize); | 748 | c+osize); |
741 | } else { | 749 | } else { |