aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/osdmap.c
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-02-23 13:41:09 -0500
committerSage Weil <sage@inktank.com>2013-02-26 18:02:25 -0500
commit4f6a7e5ee1393ec4b243b39dac9f36992d161540 (patch)
tree547684ad1dc0e1e3e376e958ab74e164d6972623 /net/ceph/osdmap.c
parentec73a754989c27628c9037887df919561280519c (diff)
ceph: update support for PGID64, PGPOOL3, OSDENC protocol features
Support (and require) the PGID64, PGPOOL3, and OSDENC protocol features. These have been present in ceph.git since v0.42, Feb 2012. Require these features to simplify support; nobody is running older userspace. Note that the new request and reply encoding is still not in place, so the new code is not yet functional. Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Alex Elder <elder@inktank.com>
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r--net/ceph/osdmap.c162
1 files changed, 86 insertions, 76 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 81118db5bd11..911919320d2e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -45,13 +45,8 @@ static int calc_bits_of(unsigned int t)
45 */ 45 */
46static void calc_pg_masks(struct ceph_pg_pool_info *pi) 46static void calc_pg_masks(struct ceph_pg_pool_info *pi)
47{ 47{
48 pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; 48 pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
49 pi->pgp_num_mask = 49 pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
50 (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
51 pi->lpg_num_mask =
52 (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
53 pi->lpgp_num_mask =
54 (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
55} 50}
56 51
57/* 52/*
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
452 return 0; 447 return 0;
453} 448}
454 449
455static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) 450static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
456{ 451{
457 struct ceph_pg_pool_info *pi; 452 struct ceph_pg_pool_info *pi;
458 struct rb_node *n = root->rb_node; 453 struct rb_node *n = root->rb_node;
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
508 503
509static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) 504static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
510{ 505{
511 unsigned int n, m; 506 u8 ev, cv;
507 unsigned len, num;
508 void *pool_end;
509
510 ceph_decode_need(p, end, 2 + 4, bad);
511 ev = ceph_decode_8(p); /* encoding version */
512 cv = ceph_decode_8(p); /* compat version */
513 if (ev < 5) {
514 pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
515 return -EINVAL;
516 }
517 if (cv > 7) {
518 pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
519 return -EINVAL;
520 }
521 len = ceph_decode_32(p);
522 ceph_decode_need(p, end, len, bad);
523 pool_end = *p + len;
512 524
513 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 525 pi->type = ceph_decode_8(p);
514 calc_pg_masks(pi); 526 pi->size = ceph_decode_8(p);
527 pi->crush_ruleset = ceph_decode_8(p);
528 pi->object_hash = ceph_decode_8(p);
515 529
516 /* num_snaps * snap_info_t */ 530 pi->pg_num = ceph_decode_32(p);
517 n = le32_to_cpu(pi->v.num_snaps); 531 pi->pgp_num = ceph_decode_32(p);
518 while (n--) { 532
519 ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + 533 *p += 4 + 4; /* skip lpg* */
520 sizeof(struct ceph_timespec), bad); 534 *p += 4; /* skip last_change */
521 *p += sizeof(u64) + /* key */ 535 *p += 8 + 4; /* skip snap_seq, snap_epoch */
522 1 + sizeof(u64) + /* u8, snapid */ 536
523 sizeof(struct ceph_timespec); 537 /* skip snaps */
524 m = ceph_decode_32(p); /* snap name */ 538 num = ceph_decode_32(p);
525 *p += m; 539 while (num--) {
540 *p += 8; /* snapid key */
541 *p += 1 + 1; /* versions */
542 len = ceph_decode_32(p);
543 *p += len;
526 } 544 }
527 545
528 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; 546 /* skip removed snaps */
547 num = ceph_decode_32(p);
548 *p += num * (8 + 8);
549
550 *p += 8; /* skip auid */
551 pi->flags = ceph_decode_64(p);
552
553 /* ignore the rest */
554
555 *p = pool_end;
556 calc_pg_masks(pi);
529 return 0; 557 return 0;
530 558
531bad: 559bad:
@@ -535,14 +563,15 @@ bad:
535static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) 563static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
536{ 564{
537 struct ceph_pg_pool_info *pi; 565 struct ceph_pg_pool_info *pi;
538 u32 num, len, pool; 566 u32 num, len;
567 u64 pool;
539 568
540 ceph_decode_32_safe(p, end, num, bad); 569 ceph_decode_32_safe(p, end, num, bad);
541 dout(" %d pool names\n", num); 570 dout(" %d pool names\n", num);
542 while (num--) { 571 while (num--) {
543 ceph_decode_32_safe(p, end, pool, bad); 572 ceph_decode_64_safe(p, end, pool, bad);
544 ceph_decode_32_safe(p, end, len, bad); 573 ceph_decode_32_safe(p, end, len, bad);
545 dout(" pool %d len %d\n", pool, len); 574 dout(" pool %llu len %d\n", pool, len);
546 ceph_decode_need(p, end, len, bad); 575 ceph_decode_need(p, end, len, bad);
547 pi = __lookup_pg_pool(&map->pg_pools, pool); 576 pi = __lookup_pg_pool(&map->pg_pools, pool);
548 if (pi) { 577 if (pi) {
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
633 struct ceph_osdmap *map; 662 struct ceph_osdmap *map;
634 u16 version; 663 u16 version;
635 u32 len, max, i; 664 u32 len, max, i;
636 u8 ev;
637 int err = -EINVAL; 665 int err = -EINVAL;
638 void *start = *p; 666 void *start = *p;
639 struct ceph_pg_pool_info *pi; 667 struct ceph_pg_pool_info *pi;
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
646 map->pg_temp = RB_ROOT; 674 map->pg_temp = RB_ROOT;
647 675
648 ceph_decode_16_safe(p, end, version, bad); 676 ceph_decode_16_safe(p, end, version, bad);
649 if (version > CEPH_OSDMAP_VERSION) { 677 if (version > 6) {
650 pr_warning("got unknown v %d > %d of osdmap\n", version, 678 pr_warning("got unknown v %d > 6 of osdmap\n", version);
651 CEPH_OSDMAP_VERSION); 679 goto bad;
680 }
681 if (version < 6) {
682 pr_warning("got old v %d < 6 of osdmap\n", version);
652 goto bad; 683 goto bad;
653 } 684 }
654 685
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
660 691
661 ceph_decode_32_safe(p, end, max, bad); 692 ceph_decode_32_safe(p, end, max, bad);
662 while (max--) { 693 while (max--) {
663 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); 694 ceph_decode_need(p, end, 8 + 2, bad);
664 err = -ENOMEM; 695 err = -ENOMEM;
665 pi = kzalloc(sizeof(*pi), GFP_NOFS); 696 pi = kzalloc(sizeof(*pi), GFP_NOFS);
666 if (!pi) 697 if (!pi)
667 goto bad; 698 goto bad;
668 pi->id = ceph_decode_32(p); 699 pi->id = ceph_decode_64(p);
669 err = -EINVAL;
670 ev = ceph_decode_8(p); /* encoding version */
671 if (ev > CEPH_PG_POOL_VERSION) {
672 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
673 ev, CEPH_PG_POOL_VERSION);
674 kfree(pi);
675 goto bad;
676 }
677 err = __decode_pool(p, end, pi); 700 err = __decode_pool(p, end, pi);
678 if (err < 0) { 701 if (err < 0) {
679 kfree(pi); 702 kfree(pi);
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
682 __insert_pg_pool(&map->pg_pools, pi); 705 __insert_pg_pool(&map->pg_pools, pi);
683 } 706 }
684 707
685 if (version >= 5) { 708 err = __decode_pool_names(p, end, map);
686 err = __decode_pool_names(p, end, map); 709 if (err < 0) {
687 if (err < 0) { 710 dout("fail to decode pool names");
688 dout("fail to decode pool names"); 711 goto bad;
689 goto bad;
690 }
691 } 712 }
692 713
693 ceph_decode_32_safe(p, end, map->pool_max, bad); 714 ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -788,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
788 struct ceph_fsid fsid; 809 struct ceph_fsid fsid;
789 u32 epoch = 0; 810 u32 epoch = 0;
790 struct ceph_timespec modified; 811 struct ceph_timespec modified;
791 u32 len, pool; 812 s32 len;
792 __s32 new_pool_max, new_flags, max; 813 u64 pool;
814 __s64 new_pool_max;
815 __s32 new_flags, max;
793 void *start = *p; 816 void *start = *p;
794 int err = -EINVAL; 817 int err = -EINVAL;
795 u16 version; 818 u16 version;
796 819
797 ceph_decode_16_safe(p, end, version, bad); 820 ceph_decode_16_safe(p, end, version, bad);
798 if (version > CEPH_OSDMAP_INC_VERSION) { 821 if (version > 6) {
799 pr_warning("got unknown v %d > %d of inc osdmap\n", version, 822 pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6);
800 CEPH_OSDMAP_INC_VERSION);
801 goto bad; 823 goto bad;
802 } 824 }
803 825
@@ -807,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
807 epoch = ceph_decode_32(p); 829 epoch = ceph_decode_32(p);
808 BUG_ON(epoch != map->epoch+1); 830 BUG_ON(epoch != map->epoch+1);
809 ceph_decode_copy(p, &modified, sizeof(modified)); 831 ceph_decode_copy(p, &modified, sizeof(modified));
810 new_pool_max = ceph_decode_32(p); 832 new_pool_max = ceph_decode_64(p);
811 new_flags = ceph_decode_32(p); 833 new_flags = ceph_decode_32(p);
812 834
813 /* full map? */ 835 /* full map? */
@@ -857,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
857 /* new_pool */ 879 /* new_pool */
858 ceph_decode_32_safe(p, end, len, bad); 880 ceph_decode_32_safe(p, end, len, bad);
859 while (len--) { 881 while (len--) {
860 __u8 ev;
861 struct ceph_pg_pool_info *pi; 882 struct ceph_pg_pool_info *pi;
862 883
863 ceph_decode_32_safe(p, end, pool, bad); 884 ceph_decode_64_safe(p, end, pool, bad);
864 ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
865 ev = ceph_decode_8(p); /* encoding version */
866 if (ev > CEPH_PG_POOL_VERSION) {
867 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
868 ev, CEPH_PG_POOL_VERSION);
869 err = -EINVAL;
870 goto bad;
871 }
872 pi = __lookup_pg_pool(&map->pg_pools, pool); 885 pi = __lookup_pg_pool(&map->pg_pools, pool);
873 if (!pi) { 886 if (!pi) {
874 pi = kzalloc(sizeof(*pi), GFP_NOFS); 887 pi = kzalloc(sizeof(*pi), GFP_NOFS);
@@ -894,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
894 while (len--) { 907 while (len--) {
895 struct ceph_pg_pool_info *pi; 908 struct ceph_pg_pool_info *pi;
896 909
897 ceph_decode_32_safe(p, end, pool, bad); 910 ceph_decode_64_safe(p, end, pool, bad);
898 pi = __lookup_pg_pool(&map->pg_pools, pool); 911 pi = __lookup_pg_pool(&map->pg_pools, pool);
899 if (pi) 912 if (pi)
900 __remove_pg_pool(&map->pg_pools, pi); 913 __remove_pg_pool(&map->pg_pools, pi);
@@ -1097,8 +1110,8 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
1097 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); 1110 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
1098 if (!pool) 1111 if (!pool)
1099 return -EIO; 1112 return -EIO;
1100 pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); 1113 pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
1101 num = le32_to_cpu(pool->v.pg_num); 1114 num = pool->pg_num;
1102 num_mask = pool->pg_num_mask; 1115 num_mask = pool->pg_num_mask;
1103 1116
1104 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, 1117 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool,
@@ -1132,8 +1145,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1132 return NULL; 1145 return NULL;
1133 1146
1134 /* pg_temp? */ 1147 /* pg_temp? */
1135 t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), 1148 t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask);
1136 pool->pgp_num_mask);
1137 pgid.seed = t; 1149 pgid.seed = t;
1138 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1150 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1139 if (pg) { 1151 if (pg) {
@@ -1142,26 +1154,24 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1142 } 1154 }
1143 1155
1144 /* crush */ 1156 /* crush */
1145 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, 1157 ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
1146 pool->v.type, pool->v.size); 1158 pool->type, pool->size);
1147 if (ruleno < 0) { 1159 if (ruleno < 0) {
1148 pr_err("no crush rule pool %d ruleset %d type %d size %d\n", 1160 pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
1149 poolid, pool->v.crush_ruleset, pool->v.type, 1161 poolid, pool->crush_ruleset, pool->type,
1150 pool->v.size); 1162 pool->size);
1151 return NULL; 1163 return NULL;
1152 } 1164 }
1153 1165
1154 pps = ceph_stable_mod(ps, 1166 pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask);
1155 le32_to_cpu(pool->v.pgp_num),
1156 pool->pgp_num_mask);
1157 pps += poolid; 1167 pps += poolid;
1158 r = crush_do_rule(osdmap->crush, ruleno, pps, osds, 1168 r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
1159 min_t(int, pool->v.size, *num), 1169 min_t(int, pool->size, *num),
1160 osdmap->osd_weight); 1170 osdmap->osd_weight);
1161 if (r < 0) { 1171 if (r < 0) {
1162 pr_err("error %d from crush rule: pool %d ruleset %d type %d" 1172 pr_err("error %d from crush rule: pool %d ruleset %d type %d"
1163 " size %d\n", r, poolid, pool->v.crush_ruleset, 1173 " size %d\n", r, poolid, pool->crush_ruleset,
1164 pool->v.type, pool->v.size); 1174 pool->type, pool->size);
1165 return NULL; 1175 return NULL;
1166 } 1176 }
1167 *num = r; 1177 *num = r;