diff options
-rw-r--r-- | fs/ceph/debugfs.c | 7 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 136 | ||||
-rw-r--r-- | fs/ceph/osdmap.h | 7 | ||||
-rw-r--r-- | fs/ceph/rados.h | 4 |
4 files changed, 104 insertions, 50 deletions
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 1a47b5c25b5f..e159f1415110 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -78,6 +78,7 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
78 | { | 78 | { |
79 | int i; | 79 | int i; |
80 | struct ceph_client *client = s->private; | 80 | struct ceph_client *client = s->private; |
81 | struct rb_node *n; | ||
81 | 82 | ||
82 | if (client->osdc.osdmap == NULL) | 83 | if (client->osdc.osdmap == NULL) |
83 | return 0; | 84 | return 0; |
@@ -87,11 +88,11 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
87 | " NEARFULL" : "", | 88 | " NEARFULL" : "", |
88 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | 89 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? |
89 | " FULL" : ""); | 90 | " FULL" : ""); |
90 | for (i = 0; i < client->osdc.osdmap->num_pools; i++) { | 91 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { |
91 | struct ceph_pg_pool_info *pool = | 92 | struct ceph_pg_pool_info *pool = |
92 | &client->osdc.osdmap->pg_pool[i]; | 93 | rb_entry(n, struct ceph_pg_pool_info, node); |
93 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | 94 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", |
94 | i, pool->v.pg_num, pool->pg_num_mask, | 95 | pool->id, pool->v.pg_num, pool->pg_num_mask, |
95 | pool->v.lpg_num, pool->lpg_num_mask); | 96 | pool->v.lpg_num, pool->lpg_num_mask); |
96 | } | 97 | } |
97 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | 98 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 443fdcdb19c4..34b5696c84fd 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -328,9 +328,15 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) | |||
328 | rb_erase(&pg->node, &map->pg_temp); | 328 | rb_erase(&pg->node, &map->pg_temp); |
329 | kfree(pg); | 329 | kfree(pg); |
330 | } | 330 | } |
331 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | ||
332 | struct ceph_pg_pool_info *pi = | ||
333 | rb_entry(rb_first(&map->pg_pools), | ||
334 | struct ceph_pg_pool_info, node); | ||
335 | rb_erase(&pi->node, &map->pg_pools); | ||
336 | kfree(pi); | ||
337 | } | ||
331 | kfree(map->osd_state); | 338 | kfree(map->osd_state); |
332 | kfree(map->osd_weight); | 339 | kfree(map->osd_weight); |
333 | kfree(map->pg_pool); | ||
334 | kfree(map->osd_addr); | 340 | kfree(map->osd_addr); |
335 | kfree(map); | 341 | kfree(map); |
336 | } | 342 | } |
@@ -433,6 +439,48 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, | |||
433 | } | 439 | } |
434 | 440 | ||
435 | /* | 441 | /* |
442 | * rbtree of pg pool info | ||
443 | */ | ||
444 | static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) | ||
445 | { | ||
446 | struct rb_node **p = &root->rb_node; | ||
447 | struct rb_node *parent = NULL; | ||
448 | struct ceph_pg_pool_info *pi = NULL; | ||
449 | |||
450 | while (*p) { | ||
451 | parent = *p; | ||
452 | pi = rb_entry(parent, struct ceph_pg_pool_info, node); | ||
453 | if (new->id < pi->id) | ||
454 | p = &(*p)->rb_left; | ||
455 | else if (new->id > pi->id) | ||
456 | p = &(*p)->rb_right; | ||
457 | else | ||
458 | return -EEXIST; | ||
459 | } | ||
460 | |||
461 | rb_link_node(&new->node, parent, p); | ||
462 | rb_insert_color(&new->node, root); | ||
463 | return 0; | ||
464 | } | ||
465 | |||
466 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | ||
467 | { | ||
468 | struct ceph_pg_pool_info *pi; | ||
469 | struct rb_node *n = root->rb_node; | ||
470 | |||
471 | while (n) { | ||
472 | pi = rb_entry(n, struct ceph_pg_pool_info, node); | ||
473 | if (id < pi->id) | ||
474 | n = n->rb_left; | ||
475 | else if (id > pi->id) | ||
476 | n = n->rb_right; | ||
477 | else | ||
478 | return pi; | ||
479 | } | ||
480 | return NULL; | ||
481 | } | ||
482 | |||
483 | /* | ||
436 | * decode a full map. | 484 | * decode a full map. |
437 | */ | 485 | */ |
438 | struct ceph_osdmap *osdmap_decode(void **p, void *end) | 486 | struct ceph_osdmap *osdmap_decode(void **p, void *end) |
@@ -443,6 +491,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
443 | u8 ev; | 491 | u8 ev; |
444 | int err = -EINVAL; | 492 | int err = -EINVAL; |
445 | void *start = *p; | 493 | void *start = *p; |
494 | struct ceph_pg_pool_info *pi; | ||
446 | 495 | ||
447 | dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | 496 | dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); |
448 | 497 | ||
@@ -464,32 +513,27 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
464 | ceph_decode_copy(p, &map->created, sizeof(map->created)); | 513 | ceph_decode_copy(p, &map->created, sizeof(map->created)); |
465 | ceph_decode_copy(p, &map->modified, sizeof(map->modified)); | 514 | ceph_decode_copy(p, &map->modified, sizeof(map->modified)); |
466 | 515 | ||
467 | map->num_pools = ceph_decode_32(p); | ||
468 | map->pg_pool = kcalloc(map->num_pools, sizeof(*map->pg_pool), | ||
469 | GFP_NOFS); | ||
470 | if (!map->pg_pool) { | ||
471 | err = -ENOMEM; | ||
472 | goto bad; | ||
473 | } | ||
474 | ceph_decode_32_safe(p, end, max, bad); | 516 | ceph_decode_32_safe(p, end, max, bad); |
475 | while (max--) { | 517 | while (max--) { |
476 | ceph_decode_need(p, end, 4+1+sizeof(map->pg_pool->v), bad); | 518 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); |
477 | i = ceph_decode_32(p); | 519 | pi = kmalloc(sizeof(*pi), GFP_NOFS); |
478 | if (i >= map->num_pools) | 520 | if (!pi) |
479 | goto bad; | 521 | goto bad; |
522 | pi->id = ceph_decode_32(p); | ||
480 | ev = ceph_decode_8(p); /* encoding version */ | 523 | ev = ceph_decode_8(p); /* encoding version */ |
481 | if (ev > CEPH_PG_POOL_VERSION) { | 524 | if (ev > CEPH_PG_POOL_VERSION) { |
482 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | 525 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", |
483 | ev, CEPH_PG_POOL_VERSION); | 526 | ev, CEPH_PG_POOL_VERSION); |
484 | goto bad; | 527 | goto bad; |
485 | } | 528 | } |
486 | ceph_decode_copy(p, &map->pg_pool[i].v, | 529 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
487 | sizeof(map->pg_pool->v)); | 530 | __insert_pg_pool(&map->pg_pools, pi); |
488 | calc_pg_masks(&map->pg_pool[i]); | 531 | calc_pg_masks(pi); |
489 | p += le32_to_cpu(map->pg_pool[i].v.num_snaps) * sizeof(u64); | 532 | p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); |
490 | p += le32_to_cpu(map->pg_pool[i].v.num_removed_snap_intervals) | 533 | p += le32_to_cpu(pi->v.num_removed_snap_intervals) |
491 | * sizeof(u64) * 2; | 534 | * sizeof(u64) * 2; |
492 | } | 535 | } |
536 | ceph_decode_32_safe(p, end, map->pool_max, bad); | ||
493 | 537 | ||
494 | ceph_decode_32_safe(p, end, map->flags, bad); | 538 | ceph_decode_32_safe(p, end, map->flags, bad); |
495 | 539 | ||
@@ -581,7 +625,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
581 | u32 epoch = 0; | 625 | u32 epoch = 0; |
582 | struct ceph_timespec modified; | 626 | struct ceph_timespec modified; |
583 | u32 len, pool; | 627 | u32 len, pool; |
584 | __s32 new_flags, max; | 628 | __s32 new_pool_max, new_flags, max; |
585 | void *start = *p; | 629 | void *start = *p; |
586 | int err = -EINVAL; | 630 | int err = -EINVAL; |
587 | u16 version; | 631 | u16 version; |
@@ -600,6 +644,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
600 | epoch = ceph_decode_32(p); | 644 | epoch = ceph_decode_32(p); |
601 | BUG_ON(epoch != map->epoch+1); | 645 | BUG_ON(epoch != map->epoch+1); |
602 | ceph_decode_copy(p, &modified, sizeof(modified)); | 646 | ceph_decode_copy(p, &modified, sizeof(modified)); |
647 | new_pool_max = ceph_decode_32(p); | ||
603 | new_flags = ceph_decode_32(p); | 648 | new_flags = ceph_decode_32(p); |
604 | 649 | ||
605 | /* full map? */ | 650 | /* full map? */ |
@@ -623,6 +668,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
623 | /* new flags? */ | 668 | /* new flags? */ |
624 | if (new_flags >= 0) | 669 | if (new_flags >= 0) |
625 | map->flags = new_flags; | 670 | map->flags = new_flags; |
671 | if (new_pool_max >= 0) | ||
672 | map->pool_max = new_pool_max; | ||
626 | 673 | ||
627 | ceph_decode_need(p, end, 5*sizeof(u32), bad); | 674 | ceph_decode_need(p, end, 5*sizeof(u32), bad); |
628 | 675 | ||
@@ -647,37 +694,42 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
647 | ceph_decode_32_safe(p, end, len, bad); | 694 | ceph_decode_32_safe(p, end, len, bad); |
648 | while (len--) { | 695 | while (len--) { |
649 | __u8 ev; | 696 | __u8 ev; |
697 | struct ceph_pg_pool_info *pi; | ||
650 | 698 | ||
651 | ceph_decode_32_safe(p, end, pool, bad); | 699 | ceph_decode_32_safe(p, end, pool, bad); |
652 | if (pool >= map->num_pools) { | 700 | ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); |
653 | void *pg_pool = kcalloc(pool + 1, | ||
654 | sizeof(*map->pg_pool), | ||
655 | GFP_NOFS); | ||
656 | if (!pg_pool) { | ||
657 | err = -ENOMEM; | ||
658 | goto bad; | ||
659 | } | ||
660 | memcpy(pg_pool, map->pg_pool, | ||
661 | map->num_pools * sizeof(*map->pg_pool)); | ||
662 | kfree(map->pg_pool); | ||
663 | map->pg_pool = pg_pool; | ||
664 | map->num_pools = pool+1; | ||
665 | } | ||
666 | ceph_decode_need(p, end, 1 + sizeof(map->pg_pool->v), bad); | ||
667 | ev = ceph_decode_8(p); /* encoding version */ | 701 | ev = ceph_decode_8(p); /* encoding version */ |
668 | if (ev > CEPH_PG_POOL_VERSION) { | 702 | if (ev > CEPH_PG_POOL_VERSION) { |
669 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | 703 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", |
670 | ev, CEPH_PG_POOL_VERSION); | 704 | ev, CEPH_PG_POOL_VERSION); |
671 | goto bad; | 705 | goto bad; |
672 | } | 706 | } |
673 | ceph_decode_copy(p, &map->pg_pool[pool].v, | 707 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
674 | sizeof(map->pg_pool->v)); | 708 | if (!pi) { |
675 | calc_pg_masks(&map->pg_pool[pool]); | 709 | pi = kmalloc(sizeof(*pi), GFP_NOFS); |
710 | if (!pi) { | ||
711 | err = -ENOMEM; | ||
712 | goto bad; | ||
713 | } | ||
714 | pi->id = pool; | ||
715 | __insert_pg_pool(&map->pg_pools, pi); | ||
716 | } | ||
717 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | ||
718 | calc_pg_masks(pi); | ||
676 | } | 719 | } |
677 | 720 | ||
678 | /* old_pool (ignore) */ | 721 | /* old_pool */ |
679 | ceph_decode_32_safe(p, end, len, bad); | 722 | ceph_decode_32_safe(p, end, len, bad); |
680 | *p += len * sizeof(u32); | 723 | while (len--) { |
724 | struct ceph_pg_pool_info *pi; | ||
725 | |||
726 | ceph_decode_32_safe(p, end, pool, bad); | ||
727 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
728 | if (pi) { | ||
729 | rb_erase(&pi->node, &map->pg_pools); | ||
730 | kfree(pi); | ||
731 | } | ||
732 | } | ||
681 | 733 | ||
682 | /* new_up */ | 734 | /* new_up */ |
683 | err = -EINVAL; | 735 | err = -EINVAL; |
@@ -861,10 +913,10 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
861 | unsigned ps; | 913 | unsigned ps; |
862 | 914 | ||
863 | BUG_ON(!osdmap); | 915 | BUG_ON(!osdmap); |
864 | if (poolid >= osdmap->num_pools) | ||
865 | return -EIO; | ||
866 | 916 | ||
867 | pool = &osdmap->pg_pool[poolid]; | 917 | pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); |
918 | if (!pool) | ||
919 | return -EIO; | ||
868 | ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); | 920 | ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); |
869 | if (preferred >= 0) { | 921 | if (preferred >= 0) { |
870 | ps += preferred; | 922 | ps += preferred; |
@@ -919,9 +971,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
919 | preferred >= osdmap->crush->max_devices) | 971 | preferred >= osdmap->crush->max_devices) |
920 | preferred = -1; | 972 | preferred = -1; |
921 | 973 | ||
922 | if (poolid >= osdmap->num_pools) | 974 | pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); |
975 | if (!pool) | ||
923 | return NULL; | 976 | return NULL; |
924 | pool = &osdmap->pg_pool[poolid]; | ||
925 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 977 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, |
926 | pool->v.type, pool->v.size); | 978 | pool->v.type, pool->v.size); |
927 | if (ruleno < 0) { | 979 | if (ruleno < 0) { |
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index c4af8418aa00..1fb55afb2642 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h | |||
@@ -19,6 +19,8 @@ | |||
19 | * the change between two successive epochs, or as a fully encoded map. | 19 | * the change between two successive epochs, or as a fully encoded map. |
20 | */ | 20 | */ |
21 | struct ceph_pg_pool_info { | 21 | struct ceph_pg_pool_info { |
22 | struct rb_node node; | ||
23 | int id; | ||
22 | struct ceph_pg_pool v; | 24 | struct ceph_pg_pool v; |
23 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; | 25 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; |
24 | }; | 26 | }; |
@@ -44,9 +46,8 @@ struct ceph_osdmap { | |||
44 | struct ceph_entity_addr *osd_addr; | 46 | struct ceph_entity_addr *osd_addr; |
45 | 47 | ||
46 | struct rb_root pg_temp; | 48 | struct rb_root pg_temp; |
47 | 49 | struct rb_root pg_pools; | |
48 | u32 num_pools; | 50 | u32 pool_max; |
49 | struct ceph_pg_pool_info *pg_pool; | ||
50 | 51 | ||
51 | /* the CRUSH map specifies the mapping of placement groups to | 52 | /* the CRUSH map specifies the mapping of placement groups to |
52 | * the list of osds that store+replicate them. */ | 53 | * the list of osds that store+replicate them. */ |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 1f4c78640541..26ac8b89a676 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -11,8 +11,8 @@ | |||
11 | /* | 11 | /* |
12 | * osdmap encoding versions | 12 | * osdmap encoding versions |
13 | */ | 13 | */ |
14 | #define CEPH_OSDMAP_INC_VERSION 3 | 14 | #define CEPH_OSDMAP_INC_VERSION 4 |
15 | #define CEPH_OSDMAP_VERSION 3 | 15 | #define CEPH_OSDMAP_VERSION 4 |
16 | 16 | ||
17 | /* | 17 | /* |
18 | * fs id | 18 | * fs id |