diff options
author | Sage Weil <sage@inktank.com> | 2013-02-23 13:41:09 -0500 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-02-26 18:02:25 -0500 |
commit | 4f6a7e5ee1393ec4b243b39dac9f36992d161540 (patch) | |
tree | 547684ad1dc0e1e3e376e958ab74e164d6972623 /net/ceph/osdmap.c | |
parent | ec73a754989c27628c9037887df919561280519c (diff) |
ceph: update support for PGID64, PGPOOL3, OSDENC protocol features
Support (and require) the PGID64, PGPOOL3, and OSDENC protocol features.
These have been present in ceph.git since v0.42, Feb 2012. Require these
features to simplify support; nobody is running older userspace.
Note that the new request and reply encoding is still not in place, so the new
code is not yet functional.
Signed-off-by: Sage Weil <sage@inktank.com>
Reviewed-by: Alex Elder <elder@inktank.com>
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r-- | net/ceph/osdmap.c | 162 |
1 files changed, 86 insertions, 76 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 81118db5bd11..911919320d2e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -45,13 +45,8 @@ static int calc_bits_of(unsigned int t) | |||
45 | */ | 45 | */ |
46 | static void calc_pg_masks(struct ceph_pg_pool_info *pi) | 46 | static void calc_pg_masks(struct ceph_pg_pool_info *pi) |
47 | { | 47 | { |
48 | pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; | 48 | pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; |
49 | pi->pgp_num_mask = | 49 | pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; |
50 | (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; | ||
51 | pi->lpg_num_mask = | ||
52 | (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; | ||
53 | pi->lpgp_num_mask = | ||
54 | (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; | ||
55 | } | 50 | } |
56 | 51 | ||
57 | /* | 52 | /* |
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) | |||
452 | return 0; | 447 | return 0; |
453 | } | 448 | } |
454 | 449 | ||
455 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | 450 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) |
456 | { | 451 | { |
457 | struct ceph_pg_pool_info *pi; | 452 | struct ceph_pg_pool_info *pi; |
458 | struct rb_node *n = root->rb_node; | 453 | struct rb_node *n = root->rb_node; |
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
508 | 503 | ||
509 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | 504 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
510 | { | 505 | { |
511 | unsigned int n, m; | 506 | u8 ev, cv; |
507 | unsigned len, num; | ||
508 | void *pool_end; | ||
509 | |||
510 | ceph_decode_need(p, end, 2 + 4, bad); | ||
511 | ev = ceph_decode_8(p); /* encoding version */ | ||
512 | cv = ceph_decode_8(p); /* compat version */ | ||
513 | if (ev < 5) { | ||
514 | pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); | ||
515 | return -EINVAL; | ||
516 | } | ||
517 | if (cv > 7) { | ||
518 | pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); | ||
519 | return -EINVAL; | ||
520 | } | ||
521 | len = ceph_decode_32(p); | ||
522 | ceph_decode_need(p, end, len, bad); | ||
523 | pool_end = *p + len; | ||
512 | 524 | ||
513 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 525 | pi->type = ceph_decode_8(p); |
514 | calc_pg_masks(pi); | 526 | pi->size = ceph_decode_8(p); |
527 | pi->crush_ruleset = ceph_decode_8(p); | ||
528 | pi->object_hash = ceph_decode_8(p); | ||
515 | 529 | ||
516 | /* num_snaps * snap_info_t */ | 530 | pi->pg_num = ceph_decode_32(p); |
517 | n = le32_to_cpu(pi->v.num_snaps); | 531 | pi->pgp_num = ceph_decode_32(p); |
518 | while (n--) { | 532 | |
519 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | 533 | *p += 4 + 4; /* skip lpg* */ |
520 | sizeof(struct ceph_timespec), bad); | 534 | *p += 4; /* skip last_change */ |
521 | *p += sizeof(u64) + /* key */ | 535 | *p += 8 + 4; /* skip snap_seq, snap_epoch */ |
522 | 1 + sizeof(u64) + /* u8, snapid */ | 536 | |
523 | sizeof(struct ceph_timespec); | 537 | /* skip snaps */ |
524 | m = ceph_decode_32(p); /* snap name */ | 538 | num = ceph_decode_32(p); |
525 | *p += m; | 539 | while (num--) { |
540 | *p += 8; /* snapid key */ | ||
541 | *p += 1 + 1; /* versions */ | ||
542 | len = ceph_decode_32(p); | ||
543 | *p += len; | ||
526 | } | 544 | } |
527 | 545 | ||
528 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 546 | /* skip removed snaps */ |
547 | num = ceph_decode_32(p); | ||
548 | *p += num * (8 + 8); | ||
549 | |||
550 | *p += 8; /* skip auid */ | ||
551 | pi->flags = ceph_decode_64(p); | ||
552 | |||
553 | /* ignore the rest */ | ||
554 | |||
555 | *p = pool_end; | ||
556 | calc_pg_masks(pi); | ||
529 | return 0; | 557 | return 0; |
530 | 558 | ||
531 | bad: | 559 | bad: |
@@ -535,14 +563,15 @@ bad: | |||
535 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 563 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
536 | { | 564 | { |
537 | struct ceph_pg_pool_info *pi; | 565 | struct ceph_pg_pool_info *pi; |
538 | u32 num, len, pool; | 566 | u32 num, len; |
567 | u64 pool; | ||
539 | 568 | ||
540 | ceph_decode_32_safe(p, end, num, bad); | 569 | ceph_decode_32_safe(p, end, num, bad); |
541 | dout(" %d pool names\n", num); | 570 | dout(" %d pool names\n", num); |
542 | while (num--) { | 571 | while (num--) { |
543 | ceph_decode_32_safe(p, end, pool, bad); | 572 | ceph_decode_64_safe(p, end, pool, bad); |
544 | ceph_decode_32_safe(p, end, len, bad); | 573 | ceph_decode_32_safe(p, end, len, bad); |
545 | dout(" pool %d len %d\n", pool, len); | 574 | dout(" pool %llu len %d\n", pool, len); |
546 | ceph_decode_need(p, end, len, bad); | 575 | ceph_decode_need(p, end, len, bad); |
547 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 576 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
548 | if (pi) { | 577 | if (pi) { |
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
633 | struct ceph_osdmap *map; | 662 | struct ceph_osdmap *map; |
634 | u16 version; | 663 | u16 version; |
635 | u32 len, max, i; | 664 | u32 len, max, i; |
636 | u8 ev; | ||
637 | int err = -EINVAL; | 665 | int err = -EINVAL; |
638 | void *start = *p; | 666 | void *start = *p; |
639 | struct ceph_pg_pool_info *pi; | 667 | struct ceph_pg_pool_info *pi; |
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
646 | map->pg_temp = RB_ROOT; | 674 | map->pg_temp = RB_ROOT; |
647 | 675 | ||
648 | ceph_decode_16_safe(p, end, version, bad); | 676 | ceph_decode_16_safe(p, end, version, bad); |
649 | if (version > CEPH_OSDMAP_VERSION) { | 677 | if (version > 6) { |
650 | pr_warning("got unknown v %d > %d of osdmap\n", version, | 678 | pr_warning("got unknown v %d > 6 of osdmap\n", version); |
651 | CEPH_OSDMAP_VERSION); | 679 | goto bad; |
680 | } | ||
681 | if (version < 6) { | ||
682 | pr_warning("got old v %d < 6 of osdmap\n", version); | ||
652 | goto bad; | 683 | goto bad; |
653 | } | 684 | } |
654 | 685 | ||
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
660 | 691 | ||
661 | ceph_decode_32_safe(p, end, max, bad); | 692 | ceph_decode_32_safe(p, end, max, bad); |
662 | while (max--) { | 693 | while (max--) { |
663 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); | 694 | ceph_decode_need(p, end, 8 + 2, bad); |
664 | err = -ENOMEM; | 695 | err = -ENOMEM; |
665 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | 696 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
666 | if (!pi) | 697 | if (!pi) |
667 | goto bad; | 698 | goto bad; |
668 | pi->id = ceph_decode_32(p); | 699 | pi->id = ceph_decode_64(p); |
669 | err = -EINVAL; | ||
670 | ev = ceph_decode_8(p); /* encoding version */ | ||
671 | if (ev > CEPH_PG_POOL_VERSION) { | ||
672 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | ||
673 | ev, CEPH_PG_POOL_VERSION); | ||
674 | kfree(pi); | ||
675 | goto bad; | ||
676 | } | ||
677 | err = __decode_pool(p, end, pi); | 700 | err = __decode_pool(p, end, pi); |
678 | if (err < 0) { | 701 | if (err < 0) { |
679 | kfree(pi); | 702 | kfree(pi); |
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
682 | __insert_pg_pool(&map->pg_pools, pi); | 705 | __insert_pg_pool(&map->pg_pools, pi); |
683 | } | 706 | } |
684 | 707 | ||
685 | if (version >= 5) { | 708 | err = __decode_pool_names(p, end, map); |
686 | err = __decode_pool_names(p, end, map); | 709 | if (err < 0) { |
687 | if (err < 0) { | 710 | dout("fail to decode pool names"); |
688 | dout("fail to decode pool names"); | 711 | goto bad; |
689 | goto bad; | ||
690 | } | ||
691 | } | 712 | } |
692 | 713 | ||
693 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 714 | ceph_decode_32_safe(p, end, map->pool_max, bad); |
@@ -788,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
788 | struct ceph_fsid fsid; | 809 | struct ceph_fsid fsid; |
789 | u32 epoch = 0; | 810 | u32 epoch = 0; |
790 | struct ceph_timespec modified; | 811 | struct ceph_timespec modified; |
791 | u32 len, pool; | 812 | s32 len; |
792 | __s32 new_pool_max, new_flags, max; | 813 | u64 pool; |
814 | __s64 new_pool_max; | ||
815 | __s32 new_flags, max; | ||
793 | void *start = *p; | 816 | void *start = *p; |
794 | int err = -EINVAL; | 817 | int err = -EINVAL; |
795 | u16 version; | 818 | u16 version; |
796 | 819 | ||
797 | ceph_decode_16_safe(p, end, version, bad); | 820 | ceph_decode_16_safe(p, end, version, bad); |
798 | if (version > CEPH_OSDMAP_INC_VERSION) { | 821 | if (version > 6) { |
799 | pr_warning("got unknown v %d > %d of inc osdmap\n", version, | 822 | pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); |
800 | CEPH_OSDMAP_INC_VERSION); | ||
801 | goto bad; | 823 | goto bad; |
802 | } | 824 | } |
803 | 825 | ||
@@ -807,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
807 | epoch = ceph_decode_32(p); | 829 | epoch = ceph_decode_32(p); |
808 | BUG_ON(epoch != map->epoch+1); | 830 | BUG_ON(epoch != map->epoch+1); |
809 | ceph_decode_copy(p, &modified, sizeof(modified)); | 831 | ceph_decode_copy(p, &modified, sizeof(modified)); |
810 | new_pool_max = ceph_decode_32(p); | 832 | new_pool_max = ceph_decode_64(p); |
811 | new_flags = ceph_decode_32(p); | 833 | new_flags = ceph_decode_32(p); |
812 | 834 | ||
813 | /* full map? */ | 835 | /* full map? */ |
@@ -857,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
857 | /* new_pool */ | 879 | /* new_pool */ |
858 | ceph_decode_32_safe(p, end, len, bad); | 880 | ceph_decode_32_safe(p, end, len, bad); |
859 | while (len--) { | 881 | while (len--) { |
860 | __u8 ev; | ||
861 | struct ceph_pg_pool_info *pi; | 882 | struct ceph_pg_pool_info *pi; |
862 | 883 | ||
863 | ceph_decode_32_safe(p, end, pool, bad); | 884 | ceph_decode_64_safe(p, end, pool, bad); |
864 | ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); | ||
865 | ev = ceph_decode_8(p); /* encoding version */ | ||
866 | if (ev > CEPH_PG_POOL_VERSION) { | ||
867 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | ||
868 | ev, CEPH_PG_POOL_VERSION); | ||
869 | err = -EINVAL; | ||
870 | goto bad; | ||
871 | } | ||
872 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 885 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
873 | if (!pi) { | 886 | if (!pi) { |
874 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | 887 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
@@ -894,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
894 | while (len--) { | 907 | while (len--) { |
895 | struct ceph_pg_pool_info *pi; | 908 | struct ceph_pg_pool_info *pi; |
896 | 909 | ||
897 | ceph_decode_32_safe(p, end, pool, bad); | 910 | ceph_decode_64_safe(p, end, pool, bad); |
898 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 911 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
899 | if (pi) | 912 | if (pi) |
900 | __remove_pg_pool(&map->pg_pools, pi); | 913 | __remove_pg_pool(&map->pg_pools, pi); |
@@ -1097,8 +1110,8 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
1097 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); | 1110 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); |
1098 | if (!pool) | 1111 | if (!pool) |
1099 | return -EIO; | 1112 | return -EIO; |
1100 | pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); | 1113 | pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); |
1101 | num = le32_to_cpu(pool->v.pg_num); | 1114 | num = pool->pg_num; |
1102 | num_mask = pool->pg_num_mask; | 1115 | num_mask = pool->pg_num_mask; |
1103 | 1116 | ||
1104 | dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, | 1117 | dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, |
@@ -1132,8 +1145,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1132 | return NULL; | 1145 | return NULL; |
1133 | 1146 | ||
1134 | /* pg_temp? */ | 1147 | /* pg_temp? */ |
1135 | t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), | 1148 | t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask); |
1136 | pool->pgp_num_mask); | ||
1137 | pgid.seed = t; | 1149 | pgid.seed = t; |
1138 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); | 1150 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); |
1139 | if (pg) { | 1151 | if (pg) { |
@@ -1142,26 +1154,24 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1142 | } | 1154 | } |
1143 | 1155 | ||
1144 | /* crush */ | 1156 | /* crush */ |
1145 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 1157 | ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, |
1146 | pool->v.type, pool->v.size); | 1158 | pool->type, pool->size); |
1147 | if (ruleno < 0) { | 1159 | if (ruleno < 0) { |
1148 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", | 1160 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", |
1149 | poolid, pool->v.crush_ruleset, pool->v.type, | 1161 | poolid, pool->crush_ruleset, pool->type, |
1150 | pool->v.size); | 1162 | pool->size); |
1151 | return NULL; | 1163 | return NULL; |
1152 | } | 1164 | } |
1153 | 1165 | ||
1154 | pps = ceph_stable_mod(ps, | 1166 | pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask); |
1155 | le32_to_cpu(pool->v.pgp_num), | ||
1156 | pool->pgp_num_mask); | ||
1157 | pps += poolid; | 1167 | pps += poolid; |
1158 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, | 1168 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, |
1159 | min_t(int, pool->v.size, *num), | 1169 | min_t(int, pool->size, *num), |
1160 | osdmap->osd_weight); | 1170 | osdmap->osd_weight); |
1161 | if (r < 0) { | 1171 | if (r < 0) { |
1162 | pr_err("error %d from crush rule: pool %d ruleset %d type %d" | 1172 | pr_err("error %d from crush rule: pool %d ruleset %d type %d" |
1163 | " size %d\n", r, poolid, pool->v.crush_ruleset, | 1173 | " size %d\n", r, poolid, pool->crush_ruleset, |
1164 | pool->v.type, pool->v.size); | 1174 | pool->type, pool->size); |
1165 | return NULL; | 1175 | return NULL; |
1166 | } | 1176 | } |
1167 | *num = r; | 1177 | *num = r; |