aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/crush
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/crush')
-rw-r--r--net/ceph/crush/crush.c39
-rw-r--r--net/ceph/crush/mapper.c124
2 files changed, 55 insertions, 108 deletions
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index d6ebb13a18a4..089613234f03 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -26,9 +26,9 @@ const char *crush_bucket_alg_name(int alg)
26 * @b: bucket pointer 26 * @b: bucket pointer
27 * @p: item index in bucket 27 * @p: item index in bucket
28 */ 28 */
29int crush_get_bucket_item_weight(struct crush_bucket *b, int p) 29int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
30{ 30{
31 if (p >= b->size) 31 if ((__u32)p >= b->size)
32 return 0; 32 return 0;
33 33
34 switch (b->alg) { 34 switch (b->alg) {
@@ -37,38 +37,13 @@ int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
37 case CRUSH_BUCKET_LIST: 37 case CRUSH_BUCKET_LIST:
38 return ((struct crush_bucket_list *)b)->item_weights[p]; 38 return ((struct crush_bucket_list *)b)->item_weights[p];
39 case CRUSH_BUCKET_TREE: 39 case CRUSH_BUCKET_TREE:
40 if (p & 1) 40 return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
41 return ((struct crush_bucket_tree *)b)->node_weights[p];
42 return 0;
43 case CRUSH_BUCKET_STRAW: 41 case CRUSH_BUCKET_STRAW:
44 return ((struct crush_bucket_straw *)b)->item_weights[p]; 42 return ((struct crush_bucket_straw *)b)->item_weights[p];
45 } 43 }
46 return 0; 44 return 0;
47} 45}
48 46
49/**
50 * crush_calc_parents - Calculate parent vectors for the given crush map.
51 * @map: crush_map pointer
52 */
53void crush_calc_parents(struct crush_map *map)
54{
55 int i, b, c;
56
57 for (b = 0; b < map->max_buckets; b++) {
58 if (map->buckets[b] == NULL)
59 continue;
60 for (i = 0; i < map->buckets[b]->size; i++) {
61 c = map->buckets[b]->items[i];
62 BUG_ON(c >= map->max_devices ||
63 c < -map->max_buckets);
64 if (c >= 0)
65 map->device_parents[c] = map->buckets[b]->id;
66 else
67 map->bucket_parents[-1-c] = map->buckets[b]->id;
68 }
69 }
70}
71
72void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) 47void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
73{ 48{
74 kfree(b->h.perm); 49 kfree(b->h.perm);
@@ -87,6 +62,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
87 62
88void crush_destroy_bucket_tree(struct crush_bucket_tree *b) 63void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
89{ 64{
65 kfree(b->h.perm);
66 kfree(b->h.items);
90 kfree(b->node_weights); 67 kfree(b->node_weights);
91 kfree(b); 68 kfree(b);
92} 69}
@@ -124,10 +101,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
124 */ 101 */
125void crush_destroy(struct crush_map *map) 102void crush_destroy(struct crush_map *map)
126{ 103{
127 int b;
128
129 /* buckets */ 104 /* buckets */
130 if (map->buckets) { 105 if (map->buckets) {
106 __s32 b;
131 for (b = 0; b < map->max_buckets; b++) { 107 for (b = 0; b < map->max_buckets; b++) {
132 if (map->buckets[b] == NULL) 108 if (map->buckets[b] == NULL)
133 continue; 109 continue;
@@ -138,13 +114,12 @@ void crush_destroy(struct crush_map *map)
138 114
139 /* rules */ 115 /* rules */
140 if (map->rules) { 116 if (map->rules) {
117 __u32 b;
141 for (b = 0; b < map->max_rules; b++) 118 for (b = 0; b < map->max_rules; b++)
142 kfree(map->rules[b]); 119 kfree(map->rules[b]);
143 kfree(map->rules); 120 kfree(map->rules);
144 } 121 }
145 122
146 kfree(map->bucket_parents);
147 kfree(map->device_parents);
148 kfree(map); 123 kfree(map);
149} 124}
150 125
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 363f8f7e6c3c..d7edc24333b8 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -33,9 +33,9 @@
33 * @type: storage ruleset type (user defined) 33 * @type: storage ruleset type (user defined)
34 * @size: output set size 34 * @size: output set size
35 */ 35 */
36int crush_find_rule(struct crush_map *map, int ruleset, int type, int size) 36int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
37{ 37{
38 int i; 38 __u32 i;
39 39
40 for (i = 0; i < map->max_rules; i++) { 40 for (i = 0; i < map->max_rules; i++) {
41 if (map->rules[i] && 41 if (map->rules[i] &&
@@ -73,7 +73,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
73 unsigned int i, s; 73 unsigned int i, s;
74 74
75 /* start a new permutation if @x has changed */ 75 /* start a new permutation if @x has changed */
76 if (bucket->perm_x != x || bucket->perm_n == 0) { 76 if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
77 dprintk("bucket %d new x=%d\n", bucket->id, x); 77 dprintk("bucket %d new x=%d\n", bucket->id, x);
78 bucket->perm_x = x; 78 bucket->perm_x = x;
79 79
@@ -153,8 +153,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
153 return bucket->h.items[i]; 153 return bucket->h.items[i];
154 } 154 }
155 155
156 BUG_ON(1); 156 dprintk("bad list sums for bucket %d\n", bucket->h.id);
157 return 0; 157 return bucket->h.items[0];
158} 158}
159 159
160 160
@@ -220,7 +220,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
220static int bucket_straw_choose(struct crush_bucket_straw *bucket, 220static int bucket_straw_choose(struct crush_bucket_straw *bucket,
221 int x, int r) 221 int x, int r)
222{ 222{
223 int i; 223 __u32 i;
224 int high = 0; 224 int high = 0;
225 __u64 high_draw = 0; 225 __u64 high_draw = 0;
226 __u64 draw; 226 __u64 draw;
@@ -240,6 +240,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
240static int crush_bucket_choose(struct crush_bucket *in, int x, int r) 240static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
241{ 241{
242 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); 242 dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
243 BUG_ON(in->size == 0);
243 switch (in->alg) { 244 switch (in->alg) {
244 case CRUSH_BUCKET_UNIFORM: 245 case CRUSH_BUCKET_UNIFORM:
245 return bucket_uniform_choose((struct crush_bucket_uniform *)in, 246 return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -254,7 +255,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
254 return bucket_straw_choose((struct crush_bucket_straw *)in, 255 return bucket_straw_choose((struct crush_bucket_straw *)in,
255 x, r); 256 x, r);
256 default: 257 default:
257 BUG_ON(1); 258 dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
258 return in->items[0]; 259 return in->items[0];
259 } 260 }
260} 261}
@@ -263,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
263 * true if device is marked "out" (failed, fully offloaded) 264 * true if device is marked "out" (failed, fully offloaded)
264 * of the cluster 265 * of the cluster
265 */ 266 */
266static int is_out(struct crush_map *map, __u32 *weight, int item, int x) 267static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
267{ 268{
268 if (weight[item] >= 0x10000) 269 if (weight[item] >= 0x10000)
269 return 0; 270 return 0;
@@ -288,16 +289,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
288 * @recurse_to_leaf: true if we want one device under each item of given type 289 * @recurse_to_leaf: true if we want one device under each item of given type
289 * @out2: second output vector for leaf items (if @recurse_to_leaf) 290 * @out2: second output vector for leaf items (if @recurse_to_leaf)
290 */ 291 */
291static int crush_choose(struct crush_map *map, 292static int crush_choose(const struct crush_map *map,
292 struct crush_bucket *bucket, 293 struct crush_bucket *bucket,
293 __u32 *weight, 294 const __u32 *weight,
294 int x, int numrep, int type, 295 int x, int numrep, int type,
295 int *out, int outpos, 296 int *out, int outpos,
296 int firstn, int recurse_to_leaf, 297 int firstn, int recurse_to_leaf,
297 int *out2) 298 int *out2)
298{ 299{
299 int rep; 300 int rep;
300 int ftotal, flocal; 301 unsigned int ftotal, flocal;
301 int retry_descent, retry_bucket, skip_rep; 302 int retry_descent, retry_bucket, skip_rep;
302 struct crush_bucket *in = bucket; 303 struct crush_bucket *in = bucket;
303 int r; 304 int r;
@@ -305,7 +306,7 @@ static int crush_choose(struct crush_map *map,
305 int item = 0; 306 int item = 0;
306 int itemtype; 307 int itemtype;
307 int collide, reject; 308 int collide, reject;
308 const int orig_tries = 5; /* attempts before we fall back to search */ 309 const unsigned int orig_tries = 5; /* attempts before we fall back to search */
309 310
310 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", 311 dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
311 bucket->id, x, outpos, numrep); 312 bucket->id, x, outpos, numrep);
@@ -326,7 +327,7 @@ static int crush_choose(struct crush_map *map,
326 r = rep; 327 r = rep;
327 if (in->alg == CRUSH_BUCKET_UNIFORM) { 328 if (in->alg == CRUSH_BUCKET_UNIFORM) {
328 /* be careful */ 329 /* be careful */
329 if (firstn || numrep >= in->size) 330 if (firstn || (__u32)numrep >= in->size)
330 /* r' = r + f_total */ 331 /* r' = r + f_total */
331 r += ftotal; 332 r += ftotal;
332 else if (in->size % numrep == 0) 333 else if (in->size % numrep == 0)
@@ -355,7 +356,11 @@ static int crush_choose(struct crush_map *map,
355 item = bucket_perm_choose(in, x, r); 356 item = bucket_perm_choose(in, x, r);
356 else 357 else
357 item = crush_bucket_choose(in, x, r); 358 item = crush_bucket_choose(in, x, r);
358 BUG_ON(item >= map->max_devices); 359 if (item >= map->max_devices) {
360 dprintk(" bad item %d\n", item);
361 skip_rep = 1;
362 break;
363 }
359 364
360 /* desired type? */ 365 /* desired type? */
361 if (item < 0) 366 if (item < 0)
@@ -366,8 +371,12 @@ static int crush_choose(struct crush_map *map,
366 371
367 /* keep going? */ 372 /* keep going? */
368 if (itemtype != type) { 373 if (itemtype != type) {
369 BUG_ON(item >= 0 || 374 if (item >= 0 ||
370 (-1-item) >= map->max_buckets); 375 (-1-item) >= map->max_buckets) {
376 dprintk(" bad item type %d\n", type);
377 skip_rep = 1;
378 break;
379 }
371 in = map->buckets[-1-item]; 380 in = map->buckets[-1-item];
372 retry_bucket = 1; 381 retry_bucket = 1;
373 continue; 382 continue;
@@ -416,7 +425,7 @@ reject:
416 if (collide && flocal < 3) 425 if (collide && flocal < 3)
417 /* retry locally a few times */ 426 /* retry locally a few times */
418 retry_bucket = 1; 427 retry_bucket = 1;
419 else if (flocal < in->size + orig_tries) 428 else if (flocal <= in->size + orig_tries)
420 /* exhaustive bucket search */ 429 /* exhaustive bucket search */
421 retry_bucket = 1; 430 retry_bucket = 1;
422 else if (ftotal < 20) 431 else if (ftotal < 20)
@@ -426,7 +435,7 @@ reject:
426 /* else give up */ 435 /* else give up */
427 skip_rep = 1; 436 skip_rep = 1;
428 dprintk(" reject %d collide %d " 437 dprintk(" reject %d collide %d "
429 "ftotal %d flocal %d\n", 438 "ftotal %u flocal %u\n",
430 reject, collide, ftotal, 439 reject, collide, ftotal,
431 flocal); 440 flocal);
432 } 441 }
@@ -455,15 +464,12 @@ reject:
455 * @x: hash input 464 * @x: hash input
456 * @result: pointer to result vector 465 * @result: pointer to result vector
457 * @result_max: maximum result size 466 * @result_max: maximum result size
458 * @force: force initial replica choice; -1 for none
459 */ 467 */
460int crush_do_rule(struct crush_map *map, 468int crush_do_rule(const struct crush_map *map,
461 int ruleno, int x, int *result, int result_max, 469 int ruleno, int x, int *result, int result_max,
462 int force, __u32 *weight) 470 const __u32 *weight)
463{ 471{
464 int result_len; 472 int result_len;
465 int force_context[CRUSH_MAX_DEPTH];
466 int force_pos = -1;
467 int a[CRUSH_MAX_SET]; 473 int a[CRUSH_MAX_SET];
468 int b[CRUSH_MAX_SET]; 474 int b[CRUSH_MAX_SET];
469 int c[CRUSH_MAX_SET]; 475 int c[CRUSH_MAX_SET];
@@ -474,66 +480,44 @@ int crush_do_rule(struct crush_map *map,
474 int osize; 480 int osize;
475 int *tmp; 481 int *tmp;
476 struct crush_rule *rule; 482 struct crush_rule *rule;
477 int step; 483 __u32 step;
478 int i, j; 484 int i, j;
479 int numrep; 485 int numrep;
480 int firstn; 486 int firstn;
481 487
482 BUG_ON(ruleno >= map->max_rules); 488 if ((__u32)ruleno >= map->max_rules) {
489 dprintk(" bad ruleno %d\n", ruleno);
490 return 0;
491 }
483 492
484 rule = map->rules[ruleno]; 493 rule = map->rules[ruleno];
485 result_len = 0; 494 result_len = 0;
486 w = a; 495 w = a;
487 o = b; 496 o = b;
488 497
489 /*
490 * determine hierarchical context of force, if any. note
491 * that this may or may not correspond to the specific types
492 * referenced by the crush rule.
493 */
494 if (force >= 0 &&
495 force < map->max_devices &&
496 map->device_parents[force] != 0 &&
497 !is_out(map, weight, force, x)) {
498 while (1) {
499 force_context[++force_pos] = force;
500 if (force >= 0)
501 force = map->device_parents[force];
502 else
503 force = map->bucket_parents[-1-force];
504 if (force == 0)
505 break;
506 }
507 }
508
509 for (step = 0; step < rule->len; step++) { 498 for (step = 0; step < rule->len; step++) {
499 struct crush_rule_step *curstep = &rule->steps[step];
500
510 firstn = 0; 501 firstn = 0;
511 switch (rule->steps[step].op) { 502 switch (curstep->op) {
512 case CRUSH_RULE_TAKE: 503 case CRUSH_RULE_TAKE:
513 w[0] = rule->steps[step].arg1; 504 w[0] = curstep->arg1;
514
515 /* find position in force_context/hierarchy */
516 while (force_pos >= 0 &&
517 force_context[force_pos] != w[0])
518 force_pos--;
519 /* and move past it */
520 if (force_pos >= 0)
521 force_pos--;
522
523 wsize = 1; 505 wsize = 1;
524 break; 506 break;
525 507
526 case CRUSH_RULE_CHOOSE_LEAF_FIRSTN: 508 case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
527 case CRUSH_RULE_CHOOSE_FIRSTN: 509 case CRUSH_RULE_CHOOSE_FIRSTN:
528 firstn = 1; 510 firstn = 1;
511 /* fall through */
529 case CRUSH_RULE_CHOOSE_LEAF_INDEP: 512 case CRUSH_RULE_CHOOSE_LEAF_INDEP:
530 case CRUSH_RULE_CHOOSE_INDEP: 513 case CRUSH_RULE_CHOOSE_INDEP:
531 BUG_ON(wsize == 0); 514 if (wsize == 0)
515 break;
532 516
533 recurse_to_leaf = 517 recurse_to_leaf =
534 rule->steps[step].op == 518 curstep->op ==
535 CRUSH_RULE_CHOOSE_LEAF_FIRSTN || 519 CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
536 rule->steps[step].op == 520 curstep->op ==
537 CRUSH_RULE_CHOOSE_LEAF_INDEP; 521 CRUSH_RULE_CHOOSE_LEAF_INDEP;
538 522
539 /* reset output */ 523 /* reset output */
@@ -545,32 +529,18 @@ int crush_do_rule(struct crush_map *map,
545 * basically, numrep <= 0 means relative to 529 * basically, numrep <= 0 means relative to
546 * the provided result_max 530 * the provided result_max
547 */ 531 */
548 numrep = rule->steps[step].arg1; 532 numrep = curstep->arg1;
549 if (numrep <= 0) { 533 if (numrep <= 0) {
550 numrep += result_max; 534 numrep += result_max;
551 if (numrep <= 0) 535 if (numrep <= 0)
552 continue; 536 continue;
553 } 537 }
554 j = 0; 538 j = 0;
555 if (osize == 0 && force_pos >= 0) {
556 /* skip any intermediate types */
557 while (force_pos &&
558 force_context[force_pos] < 0 &&
559 rule->steps[step].arg2 !=
560 map->buckets[-1 -
561 force_context[force_pos]]->type)
562 force_pos--;
563 o[osize] = force_context[force_pos];
564 if (recurse_to_leaf)
565 c[osize] = force_context[0];
566 j++;
567 force_pos--;
568 }
569 osize += crush_choose(map, 539 osize += crush_choose(map,
570 map->buckets[-1-w[i]], 540 map->buckets[-1-w[i]],
571 weight, 541 weight,
572 x, numrep, 542 x, numrep,
573 rule->steps[step].arg2, 543 curstep->arg2,
574 o+osize, j, 544 o+osize, j,
575 firstn, 545 firstn,
576 recurse_to_leaf, c+osize); 546 recurse_to_leaf, c+osize);
@@ -597,7 +567,9 @@ int crush_do_rule(struct crush_map *map,
597 break; 567 break;
598 568
599 default: 569 default:
600 BUG_ON(1); 570 dprintk(" unknown op %d at step %d\n",
571 curstep->op, step);
572 break;
601 } 573 }
602 } 574 }
603 return result_len; 575 return result_len;