diff options
Diffstat (limited to 'net/ceph/crush/mapper.c')
-rw-r--r-- | net/ceph/crush/mapper.c | 124 |
1 files changed, 48 insertions, 76 deletions
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 363f8f7e6c3..d7edc24333b 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
@@ -33,9 +33,9 @@ | |||
33 | * @type: storage ruleset type (user defined) | 33 | * @type: storage ruleset type (user defined) |
34 | * @size: output set size | 34 | * @size: output set size |
35 | */ | 35 | */ |
36 | int crush_find_rule(struct crush_map *map, int ruleset, int type, int size) | 36 | int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size) |
37 | { | 37 | { |
38 | int i; | 38 | __u32 i; |
39 | 39 | ||
40 | for (i = 0; i < map->max_rules; i++) { | 40 | for (i = 0; i < map->max_rules; i++) { |
41 | if (map->rules[i] && | 41 | if (map->rules[i] && |
@@ -73,7 +73,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket, | |||
73 | unsigned int i, s; | 73 | unsigned int i, s; |
74 | 74 | ||
75 | /* start a new permutation if @x has changed */ | 75 | /* start a new permutation if @x has changed */ |
76 | if (bucket->perm_x != x || bucket->perm_n == 0) { | 76 | if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) { |
77 | dprintk("bucket %d new x=%d\n", bucket->id, x); | 77 | dprintk("bucket %d new x=%d\n", bucket->id, x); |
78 | bucket->perm_x = x; | 78 | bucket->perm_x = x; |
79 | 79 | ||
@@ -153,8 +153,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket, | |||
153 | return bucket->h.items[i]; | 153 | return bucket->h.items[i]; |
154 | } | 154 | } |
155 | 155 | ||
156 | BUG_ON(1); | 156 | dprintk("bad list sums for bucket %d\n", bucket->h.id); |
157 | return 0; | 157 | return bucket->h.items[0]; |
158 | } | 158 | } |
159 | 159 | ||
160 | 160 | ||
@@ -220,7 +220,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket, | |||
220 | static int bucket_straw_choose(struct crush_bucket_straw *bucket, | 220 | static int bucket_straw_choose(struct crush_bucket_straw *bucket, |
221 | int x, int r) | 221 | int x, int r) |
222 | { | 222 | { |
223 | int i; | 223 | __u32 i; |
224 | int high = 0; | 224 | int high = 0; |
225 | __u64 high_draw = 0; | 225 | __u64 high_draw = 0; |
226 | __u64 draw; | 226 | __u64 draw; |
@@ -240,6 +240,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket, | |||
240 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | 240 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) |
241 | { | 241 | { |
242 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); | 242 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); |
243 | BUG_ON(in->size == 0); | ||
243 | switch (in->alg) { | 244 | switch (in->alg) { |
244 | case CRUSH_BUCKET_UNIFORM: | 245 | case CRUSH_BUCKET_UNIFORM: |
245 | return bucket_uniform_choose((struct crush_bucket_uniform *)in, | 246 | return bucket_uniform_choose((struct crush_bucket_uniform *)in, |
@@ -254,7 +255,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | |||
254 | return bucket_straw_choose((struct crush_bucket_straw *)in, | 255 | return bucket_straw_choose((struct crush_bucket_straw *)in, |
255 | x, r); | 256 | x, r); |
256 | default: | 257 | default: |
257 | BUG_ON(1); | 258 | dprintk("unknown bucket %d alg %d\n", in->id, in->alg); |
258 | return in->items[0]; | 259 | return in->items[0]; |
259 | } | 260 | } |
260 | } | 261 | } |
@@ -263,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | |||
263 | * true if device is marked "out" (failed, fully offloaded) | 264 | * true if device is marked "out" (failed, fully offloaded) |
264 | * of the cluster | 265 | * of the cluster |
265 | */ | 266 | */ |
266 | static int is_out(struct crush_map *map, __u32 *weight, int item, int x) | 267 | static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x) |
267 | { | 268 | { |
268 | if (weight[item] >= 0x10000) | 269 | if (weight[item] >= 0x10000) |
269 | return 0; | 270 | return 0; |
@@ -288,16 +289,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x) | |||
288 | * @recurse_to_leaf: true if we want one device under each item of given type | 289 | * @recurse_to_leaf: true if we want one device under each item of given type |
289 | * @out2: second output vector for leaf items (if @recurse_to_leaf) | 290 | * @out2: second output vector for leaf items (if @recurse_to_leaf) |
290 | */ | 291 | */ |
291 | static int crush_choose(struct crush_map *map, | 292 | static int crush_choose(const struct crush_map *map, |
292 | struct crush_bucket *bucket, | 293 | struct crush_bucket *bucket, |
293 | __u32 *weight, | 294 | const __u32 *weight, |
294 | int x, int numrep, int type, | 295 | int x, int numrep, int type, |
295 | int *out, int outpos, | 296 | int *out, int outpos, |
296 | int firstn, int recurse_to_leaf, | 297 | int firstn, int recurse_to_leaf, |
297 | int *out2) | 298 | int *out2) |
298 | { | 299 | { |
299 | int rep; | 300 | int rep; |
300 | int ftotal, flocal; | 301 | unsigned int ftotal, flocal; |
301 | int retry_descent, retry_bucket, skip_rep; | 302 | int retry_descent, retry_bucket, skip_rep; |
302 | struct crush_bucket *in = bucket; | 303 | struct crush_bucket *in = bucket; |
303 | int r; | 304 | int r; |
@@ -305,7 +306,7 @@ static int crush_choose(struct crush_map *map, | |||
305 | int item = 0; | 306 | int item = 0; |
306 | int itemtype; | 307 | int itemtype; |
307 | int collide, reject; | 308 | int collide, reject; |
308 | const int orig_tries = 5; /* attempts before we fall back to search */ | 309 | const unsigned int orig_tries = 5; /* attempts before we fall back to search */ |
309 | 310 | ||
310 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", | 311 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", |
311 | bucket->id, x, outpos, numrep); | 312 | bucket->id, x, outpos, numrep); |
@@ -326,7 +327,7 @@ static int crush_choose(struct crush_map *map, | |||
326 | r = rep; | 327 | r = rep; |
327 | if (in->alg == CRUSH_BUCKET_UNIFORM) { | 328 | if (in->alg == CRUSH_BUCKET_UNIFORM) { |
328 | /* be careful */ | 329 | /* be careful */ |
329 | if (firstn || numrep >= in->size) | 330 | if (firstn || (__u32)numrep >= in->size) |
330 | /* r' = r + f_total */ | 331 | /* r' = r + f_total */ |
331 | r += ftotal; | 332 | r += ftotal; |
332 | else if (in->size % numrep == 0) | 333 | else if (in->size % numrep == 0) |
@@ -355,7 +356,11 @@ static int crush_choose(struct crush_map *map, | |||
355 | item = bucket_perm_choose(in, x, r); | 356 | item = bucket_perm_choose(in, x, r); |
356 | else | 357 | else |
357 | item = crush_bucket_choose(in, x, r); | 358 | item = crush_bucket_choose(in, x, r); |
358 | BUG_ON(item >= map->max_devices); | 359 | if (item >= map->max_devices) { |
360 | dprintk(" bad item %d\n", item); | ||
361 | skip_rep = 1; | ||
362 | break; | ||
363 | } | ||
359 | 364 | ||
360 | /* desired type? */ | 365 | /* desired type? */ |
361 | if (item < 0) | 366 | if (item < 0) |
@@ -366,8 +371,12 @@ static int crush_choose(struct crush_map *map, | |||
366 | 371 | ||
367 | /* keep going? */ | 372 | /* keep going? */ |
368 | if (itemtype != type) { | 373 | if (itemtype != type) { |
369 | BUG_ON(item >= 0 || | 374 | if (item >= 0 || |
370 | (-1-item) >= map->max_buckets); | 375 | (-1-item) >= map->max_buckets) { |
376 | dprintk(" bad item type %d\n", type); | ||
377 | skip_rep = 1; | ||
378 | break; | ||
379 | } | ||
371 | in = map->buckets[-1-item]; | 380 | in = map->buckets[-1-item]; |
372 | retry_bucket = 1; | 381 | retry_bucket = 1; |
373 | continue; | 382 | continue; |
@@ -416,7 +425,7 @@ reject: | |||
416 | if (collide && flocal < 3) | 425 | if (collide && flocal < 3) |
417 | /* retry locally a few times */ | 426 | /* retry locally a few times */ |
418 | retry_bucket = 1; | 427 | retry_bucket = 1; |
419 | else if (flocal < in->size + orig_tries) | 428 | else if (flocal <= in->size + orig_tries) |
420 | /* exhaustive bucket search */ | 429 | /* exhaustive bucket search */ |
421 | retry_bucket = 1; | 430 | retry_bucket = 1; |
422 | else if (ftotal < 20) | 431 | else if (ftotal < 20) |
@@ -426,7 +435,7 @@ reject: | |||
426 | /* else give up */ | 435 | /* else give up */ |
427 | skip_rep = 1; | 436 | skip_rep = 1; |
428 | dprintk(" reject %d collide %d " | 437 | dprintk(" reject %d collide %d " |
429 | "ftotal %d flocal %d\n", | 438 | "ftotal %u flocal %u\n", |
430 | reject, collide, ftotal, | 439 | reject, collide, ftotal, |
431 | flocal); | 440 | flocal); |
432 | } | 441 | } |
@@ -455,15 +464,12 @@ reject: | |||
455 | * @x: hash input | 464 | * @x: hash input |
456 | * @result: pointer to result vector | 465 | * @result: pointer to result vector |
457 | * @result_max: maximum result size | 466 | * @result_max: maximum result size |
458 | * @force: force initial replica choice; -1 for none | ||
459 | */ | 467 | */ |
460 | int crush_do_rule(struct crush_map *map, | 468 | int crush_do_rule(const struct crush_map *map, |
461 | int ruleno, int x, int *result, int result_max, | 469 | int ruleno, int x, int *result, int result_max, |
462 | int force, __u32 *weight) | 470 | const __u32 *weight) |
463 | { | 471 | { |
464 | int result_len; | 472 | int result_len; |
465 | int force_context[CRUSH_MAX_DEPTH]; | ||
466 | int force_pos = -1; | ||
467 | int a[CRUSH_MAX_SET]; | 473 | int a[CRUSH_MAX_SET]; |
468 | int b[CRUSH_MAX_SET]; | 474 | int b[CRUSH_MAX_SET]; |
469 | int c[CRUSH_MAX_SET]; | 475 | int c[CRUSH_MAX_SET]; |
@@ -474,66 +480,44 @@ int crush_do_rule(struct crush_map *map, | |||
474 | int osize; | 480 | int osize; |
475 | int *tmp; | 481 | int *tmp; |
476 | struct crush_rule *rule; | 482 | struct crush_rule *rule; |
477 | int step; | 483 | __u32 step; |
478 | int i, j; | 484 | int i, j; |
479 | int numrep; | 485 | int numrep; |
480 | int firstn; | 486 | int firstn; |
481 | 487 | ||
482 | BUG_ON(ruleno >= map->max_rules); | 488 | if ((__u32)ruleno >= map->max_rules) { |
489 | dprintk(" bad ruleno %d\n", ruleno); | ||
490 | return 0; | ||
491 | } | ||
483 | 492 | ||
484 | rule = map->rules[ruleno]; | 493 | rule = map->rules[ruleno]; |
485 | result_len = 0; | 494 | result_len = 0; |
486 | w = a; | 495 | w = a; |
487 | o = b; | 496 | o = b; |
488 | 497 | ||
489 | /* | ||
490 | * determine hierarchical context of force, if any. note | ||
491 | * that this may or may not correspond to the specific types | ||
492 | * referenced by the crush rule. | ||
493 | */ | ||
494 | if (force >= 0 && | ||
495 | force < map->max_devices && | ||
496 | map->device_parents[force] != 0 && | ||
497 | !is_out(map, weight, force, x)) { | ||
498 | while (1) { | ||
499 | force_context[++force_pos] = force; | ||
500 | if (force >= 0) | ||
501 | force = map->device_parents[force]; | ||
502 | else | ||
503 | force = map->bucket_parents[-1-force]; | ||
504 | if (force == 0) | ||
505 | break; | ||
506 | } | ||
507 | } | ||
508 | |||
509 | for (step = 0; step < rule->len; step++) { | 498 | for (step = 0; step < rule->len; step++) { |
499 | struct crush_rule_step *curstep = &rule->steps[step]; | ||
500 | |||
510 | firstn = 0; | 501 | firstn = 0; |
511 | switch (rule->steps[step].op) { | 502 | switch (curstep->op) { |
512 | case CRUSH_RULE_TAKE: | 503 | case CRUSH_RULE_TAKE: |
513 | w[0] = rule->steps[step].arg1; | 504 | w[0] = curstep->arg1; |
514 | |||
515 | /* find position in force_context/hierarchy */ | ||
516 | while (force_pos >= 0 && | ||
517 | force_context[force_pos] != w[0]) | ||
518 | force_pos--; | ||
519 | /* and move past it */ | ||
520 | if (force_pos >= 0) | ||
521 | force_pos--; | ||
522 | |||
523 | wsize = 1; | 505 | wsize = 1; |
524 | break; | 506 | break; |
525 | 507 | ||
526 | case CRUSH_RULE_CHOOSE_LEAF_FIRSTN: | 508 | case CRUSH_RULE_CHOOSE_LEAF_FIRSTN: |
527 | case CRUSH_RULE_CHOOSE_FIRSTN: | 509 | case CRUSH_RULE_CHOOSE_FIRSTN: |
528 | firstn = 1; | 510 | firstn = 1; |
511 | /* fall through */ | ||
529 | case CRUSH_RULE_CHOOSE_LEAF_INDEP: | 512 | case CRUSH_RULE_CHOOSE_LEAF_INDEP: |
530 | case CRUSH_RULE_CHOOSE_INDEP: | 513 | case CRUSH_RULE_CHOOSE_INDEP: |
531 | BUG_ON(wsize == 0); | 514 | if (wsize == 0) |
515 | break; | ||
532 | 516 | ||
533 | recurse_to_leaf = | 517 | recurse_to_leaf = |
534 | rule->steps[step].op == | 518 | curstep->op == |
535 | CRUSH_RULE_CHOOSE_LEAF_FIRSTN || | 519 | CRUSH_RULE_CHOOSE_LEAF_FIRSTN || |
536 | rule->steps[step].op == | 520 | curstep->op == |
537 | CRUSH_RULE_CHOOSE_LEAF_INDEP; | 521 | CRUSH_RULE_CHOOSE_LEAF_INDEP; |
538 | 522 | ||
539 | /* reset output */ | 523 | /* reset output */ |
@@ -545,32 +529,18 @@ int crush_do_rule(struct crush_map *map, | |||
545 | * basically, numrep <= 0 means relative to | 529 | * basically, numrep <= 0 means relative to |
546 | * the provided result_max | 530 | * the provided result_max |
547 | */ | 531 | */ |
548 | numrep = rule->steps[step].arg1; | 532 | numrep = curstep->arg1; |
549 | if (numrep <= 0) { | 533 | if (numrep <= 0) { |
550 | numrep += result_max; | 534 | numrep += result_max; |
551 | if (numrep <= 0) | 535 | if (numrep <= 0) |
552 | continue; | 536 | continue; |
553 | } | 537 | } |
554 | j = 0; | 538 | j = 0; |
555 | if (osize == 0 && force_pos >= 0) { | ||
556 | /* skip any intermediate types */ | ||
557 | while (force_pos && | ||
558 | force_context[force_pos] < 0 && | ||
559 | rule->steps[step].arg2 != | ||
560 | map->buckets[-1 - | ||
561 | force_context[force_pos]]->type) | ||
562 | force_pos--; | ||
563 | o[osize] = force_context[force_pos]; | ||
564 | if (recurse_to_leaf) | ||
565 | c[osize] = force_context[0]; | ||
566 | j++; | ||
567 | force_pos--; | ||
568 | } | ||
569 | osize += crush_choose(map, | 539 | osize += crush_choose(map, |
570 | map->buckets[-1-w[i]], | 540 | map->buckets[-1-w[i]], |
571 | weight, | 541 | weight, |
572 | x, numrep, | 542 | x, numrep, |
573 | rule->steps[step].arg2, | 543 | curstep->arg2, |
574 | o+osize, j, | 544 | o+osize, j, |
575 | firstn, | 545 | firstn, |
576 | recurse_to_leaf, c+osize); | 546 | recurse_to_leaf, c+osize); |
@@ -597,7 +567,9 @@ int crush_do_rule(struct crush_map *map, | |||
597 | break; | 567 | break; |
598 | 568 | ||
599 | default: | 569 | default: |
600 | BUG_ON(1); | 570 | dprintk(" unknown op %d at step %d\n", |
571 | curstep->op, step); | ||
572 | break; | ||
601 | } | 573 | } |
602 | } | 574 | } |
603 | return result_len; | 575 | return result_len; |