aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/debugfs.c7
-rw-r--r--fs/ceph/osdmap.c136
-rw-r--r--fs/ceph/osdmap.h7
-rw-r--r--fs/ceph/rados.h4
4 files changed, 104 insertions, 50 deletions
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 1a47b5c25b5f..e159f1415110 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -78,6 +78,7 @@ static int osdmap_show(struct seq_file *s, void *p)
78{ 78{
79 int i; 79 int i;
80 struct ceph_client *client = s->private; 80 struct ceph_client *client = s->private;
81 struct rb_node *n;
81 82
82 if (client->osdc.osdmap == NULL) 83 if (client->osdc.osdmap == NULL)
83 return 0; 84 return 0;
@@ -87,11 +88,11 @@ static int osdmap_show(struct seq_file *s, void *p)
87 " NEARFULL" : "", 88 " NEARFULL" : "",
88 (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? 89 (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
89 " FULL" : ""); 90 " FULL" : "");
90 for (i = 0; i < client->osdc.osdmap->num_pools; i++) { 91 for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
91 struct ceph_pg_pool_info *pool = 92 struct ceph_pg_pool_info *pool =
92 &client->osdc.osdmap->pg_pool[i]; 93 rb_entry(n, struct ceph_pg_pool_info, node);
93 seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", 94 seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
94 i, pool->v.pg_num, pool->pg_num_mask, 95 pool->id, pool->v.pg_num, pool->pg_num_mask,
95 pool->v.lpg_num, pool->lpg_num_mask); 96 pool->v.lpg_num, pool->lpg_num_mask);
96 } 97 }
97 for (i = 0; i < client->osdc.osdmap->max_osd; i++) { 98 for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 443fdcdb19c4..34b5696c84fd 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -328,9 +328,15 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
328 rb_erase(&pg->node, &map->pg_temp); 328 rb_erase(&pg->node, &map->pg_temp);
329 kfree(pg); 329 kfree(pg);
330 } 330 }
331 while (!RB_EMPTY_ROOT(&map->pg_pools)) {
332 struct ceph_pg_pool_info *pi =
333 rb_entry(rb_first(&map->pg_pools),
334 struct ceph_pg_pool_info, node);
335 rb_erase(&pi->node, &map->pg_pools);
336 kfree(pi);
337 }
331 kfree(map->osd_state); 338 kfree(map->osd_state);
332 kfree(map->osd_weight); 339 kfree(map->osd_weight);
333 kfree(map->pg_pool);
334 kfree(map->osd_addr); 340 kfree(map->osd_addr);
335 kfree(map); 341 kfree(map);
336} 342}
@@ -433,6 +439,48 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
433} 439}
434 440
435/* 441/*
442 * rbtree of pg pool info
443 */
444static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
445{
446 struct rb_node **p = &root->rb_node;
447 struct rb_node *parent = NULL;
448 struct ceph_pg_pool_info *pi = NULL;
449
450 while (*p) {
451 parent = *p;
452 pi = rb_entry(parent, struct ceph_pg_pool_info, node);
453 if (new->id < pi->id)
454 p = &(*p)->rb_left;
455 else if (new->id > pi->id)
456 p = &(*p)->rb_right;
457 else
458 return -EEXIST;
459 }
460
461 rb_link_node(&new->node, parent, p);
462 rb_insert_color(&new->node, root);
463 return 0;
464}
465
466static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id)
467{
468 struct ceph_pg_pool_info *pi;
469 struct rb_node *n = root->rb_node;
470
471 while (n) {
472 pi = rb_entry(n, struct ceph_pg_pool_info, node);
473 if (id < pi->id)
474 n = n->rb_left;
475 else if (id > pi->id)
476 n = n->rb_right;
477 else
478 return pi;
479 }
480 return NULL;
481}
482
483/*
436 * decode a full map. 484 * decode a full map.
437 */ 485 */
438struct ceph_osdmap *osdmap_decode(void **p, void *end) 486struct ceph_osdmap *osdmap_decode(void **p, void *end)
@@ -443,6 +491,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
443 u8 ev; 491 u8 ev;
444 int err = -EINVAL; 492 int err = -EINVAL;
445 void *start = *p; 493 void *start = *p;
494 struct ceph_pg_pool_info *pi;
446 495
447 dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); 496 dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p));
448 497
@@ -464,32 +513,27 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
464 ceph_decode_copy(p, &map->created, sizeof(map->created)); 513 ceph_decode_copy(p, &map->created, sizeof(map->created));
465 ceph_decode_copy(p, &map->modified, sizeof(map->modified)); 514 ceph_decode_copy(p, &map->modified, sizeof(map->modified));
466 515
467 map->num_pools = ceph_decode_32(p);
468 map->pg_pool = kcalloc(map->num_pools, sizeof(*map->pg_pool),
469 GFP_NOFS);
470 if (!map->pg_pool) {
471 err = -ENOMEM;
472 goto bad;
473 }
474 ceph_decode_32_safe(p, end, max, bad); 516 ceph_decode_32_safe(p, end, max, bad);
475 while (max--) { 517 while (max--) {
476 ceph_decode_need(p, end, 4+1+sizeof(map->pg_pool->v), bad); 518 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad);
477 i = ceph_decode_32(p); 519 pi = kmalloc(sizeof(*pi), GFP_NOFS);
478 if (i >= map->num_pools) 520 if (!pi)
479 goto bad; 521 goto bad;
522 pi->id = ceph_decode_32(p);
480 ev = ceph_decode_8(p); /* encoding version */ 523 ev = ceph_decode_8(p); /* encoding version */
481 if (ev > CEPH_PG_POOL_VERSION) { 524 if (ev > CEPH_PG_POOL_VERSION) {
482 pr_warning("got unknown v %d > %d of ceph_pg_pool\n", 525 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
483 ev, CEPH_PG_POOL_VERSION); 526 ev, CEPH_PG_POOL_VERSION);
484 goto bad; 527 goto bad;
485 } 528 }
486 ceph_decode_copy(p, &map->pg_pool[i].v, 529 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
487 sizeof(map->pg_pool->v)); 530 __insert_pg_pool(&map->pg_pools, pi);
488 calc_pg_masks(&map->pg_pool[i]); 531 calc_pg_masks(pi);
489 p += le32_to_cpu(map->pg_pool[i].v.num_snaps) * sizeof(u64); 532 p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64);
490 p += le32_to_cpu(map->pg_pool[i].v.num_removed_snap_intervals) 533 p += le32_to_cpu(pi->v.num_removed_snap_intervals)
491 * sizeof(u64) * 2; 534 * sizeof(u64) * 2;
492 } 535 }
536 ceph_decode_32_safe(p, end, map->pool_max, bad);
493 537
494 ceph_decode_32_safe(p, end, map->flags, bad); 538 ceph_decode_32_safe(p, end, map->flags, bad);
495 539
@@ -581,7 +625,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
581 u32 epoch = 0; 625 u32 epoch = 0;
582 struct ceph_timespec modified; 626 struct ceph_timespec modified;
583 u32 len, pool; 627 u32 len, pool;
584 __s32 new_flags, max; 628 __s32 new_pool_max, new_flags, max;
585 void *start = *p; 629 void *start = *p;
586 int err = -EINVAL; 630 int err = -EINVAL;
587 u16 version; 631 u16 version;
@@ -600,6 +644,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
600 epoch = ceph_decode_32(p); 644 epoch = ceph_decode_32(p);
601 BUG_ON(epoch != map->epoch+1); 645 BUG_ON(epoch != map->epoch+1);
602 ceph_decode_copy(p, &modified, sizeof(modified)); 646 ceph_decode_copy(p, &modified, sizeof(modified));
647 new_pool_max = ceph_decode_32(p);
603 new_flags = ceph_decode_32(p); 648 new_flags = ceph_decode_32(p);
604 649
605 /* full map? */ 650 /* full map? */
@@ -623,6 +668,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
623 /* new flags? */ 668 /* new flags? */
624 if (new_flags >= 0) 669 if (new_flags >= 0)
625 map->flags = new_flags; 670 map->flags = new_flags;
671 if (new_pool_max >= 0)
672 map->pool_max = new_pool_max;
626 673
627 ceph_decode_need(p, end, 5*sizeof(u32), bad); 674 ceph_decode_need(p, end, 5*sizeof(u32), bad);
628 675
@@ -647,37 +694,42 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
647 ceph_decode_32_safe(p, end, len, bad); 694 ceph_decode_32_safe(p, end, len, bad);
648 while (len--) { 695 while (len--) {
649 __u8 ev; 696 __u8 ev;
697 struct ceph_pg_pool_info *pi;
650 698
651 ceph_decode_32_safe(p, end, pool, bad); 699 ceph_decode_32_safe(p, end, pool, bad);
652 if (pool >= map->num_pools) { 700 ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
653 void *pg_pool = kcalloc(pool + 1,
654 sizeof(*map->pg_pool),
655 GFP_NOFS);
656 if (!pg_pool) {
657 err = -ENOMEM;
658 goto bad;
659 }
660 memcpy(pg_pool, map->pg_pool,
661 map->num_pools * sizeof(*map->pg_pool));
662 kfree(map->pg_pool);
663 map->pg_pool = pg_pool;
664 map->num_pools = pool+1;
665 }
666 ceph_decode_need(p, end, 1 + sizeof(map->pg_pool->v), bad);
667 ev = ceph_decode_8(p); /* encoding version */ 701 ev = ceph_decode_8(p); /* encoding version */
668 if (ev > CEPH_PG_POOL_VERSION) { 702 if (ev > CEPH_PG_POOL_VERSION) {
669 pr_warning("got unknown v %d > %d of ceph_pg_pool\n", 703 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
670 ev, CEPH_PG_POOL_VERSION); 704 ev, CEPH_PG_POOL_VERSION);
671 goto bad; 705 goto bad;
672 } 706 }
673 ceph_decode_copy(p, &map->pg_pool[pool].v, 707 pi = __lookup_pg_pool(&map->pg_pools, pool);
674 sizeof(map->pg_pool->v)); 708 if (!pi) {
675 calc_pg_masks(&map->pg_pool[pool]); 709 pi = kmalloc(sizeof(*pi), GFP_NOFS);
710 if (!pi) {
711 err = -ENOMEM;
712 goto bad;
713 }
714 pi->id = pool;
715 __insert_pg_pool(&map->pg_pools, pi);
716 }
717 ceph_decode_copy(p, &pi->v, sizeof(pi->v));
718 calc_pg_masks(pi);
676 } 719 }
677 720
678 /* old_pool (ignore) */ 721 /* old_pool */
679 ceph_decode_32_safe(p, end, len, bad); 722 ceph_decode_32_safe(p, end, len, bad);
680 *p += len * sizeof(u32); 723 while (len--) {
724 struct ceph_pg_pool_info *pi;
725
726 ceph_decode_32_safe(p, end, pool, bad);
727 pi = __lookup_pg_pool(&map->pg_pools, pool);
728 if (pi) {
729 rb_erase(&pi->node, &map->pg_pools);
730 kfree(pi);
731 }
732 }
681 733
682 /* new_up */ 734 /* new_up */
683 err = -EINVAL; 735 err = -EINVAL;
@@ -861,10 +913,10 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
861 unsigned ps; 913 unsigned ps;
862 914
863 BUG_ON(!osdmap); 915 BUG_ON(!osdmap);
864 if (poolid >= osdmap->num_pools)
865 return -EIO;
866 916
867 pool = &osdmap->pg_pool[poolid]; 917 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
918 if (!pool)
919 return -EIO;
868 ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); 920 ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
869 if (preferred >= 0) { 921 if (preferred >= 0) {
870 ps += preferred; 922 ps += preferred;
@@ -919,9 +971,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
919 preferred >= osdmap->crush->max_devices) 971 preferred >= osdmap->crush->max_devices)
920 preferred = -1; 972 preferred = -1;
921 973
922 if (poolid >= osdmap->num_pools) 974 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
975 if (!pool)
923 return NULL; 976 return NULL;
924 pool = &osdmap->pg_pool[poolid];
925 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, 977 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
926 pool->v.type, pool->v.size); 978 pool->v.type, pool->v.size);
927 if (ruleno < 0) { 979 if (ruleno < 0) {
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index c4af8418aa00..1fb55afb2642 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -19,6 +19,8 @@
19 * the change between two successive epochs, or as a fully encoded map. 19 * the change between two successive epochs, or as a fully encoded map.
20 */ 20 */
21struct ceph_pg_pool_info { 21struct ceph_pg_pool_info {
22 struct rb_node node;
23 int id;
22 struct ceph_pg_pool v; 24 struct ceph_pg_pool v;
23 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; 25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
24}; 26};
@@ -44,9 +46,8 @@ struct ceph_osdmap {
44 struct ceph_entity_addr *osd_addr; 46 struct ceph_entity_addr *osd_addr;
45 47
46 struct rb_root pg_temp; 48 struct rb_root pg_temp;
47 49 struct rb_root pg_pools;
48 u32 num_pools; 50 u32 pool_max;
49 struct ceph_pg_pool_info *pg_pool;
50 51
51 /* the CRUSH map specifies the mapping of placement groups to 52 /* the CRUSH map specifies the mapping of placement groups to
52 * the list of osds that store+replicate them. */ 53 * the list of osds that store+replicate them. */
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index 1f4c78640541..26ac8b89a676 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -11,8 +11,8 @@
11/* 11/*
12 * osdmap encoding versions 12 * osdmap encoding versions
13 */ 13 */
14#define CEPH_OSDMAP_INC_VERSION 3 14#define CEPH_OSDMAP_INC_VERSION 4
15#define CEPH_OSDMAP_VERSION 3 15#define CEPH_OSDMAP_VERSION 4
16 16
17/* 17/*
18 * fs id 18 * fs id