diff options
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r-- | net/ceph/osdmap.c | 162 |
1 files changed, 86 insertions, 76 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 81118db5bd11..911919320d2e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -45,13 +45,8 @@ static int calc_bits_of(unsigned int t) | |||
45 | */ | 45 | */ |
46 | static void calc_pg_masks(struct ceph_pg_pool_info *pi) | 46 | static void calc_pg_masks(struct ceph_pg_pool_info *pi) |
47 | { | 47 | { |
48 | pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; | 48 | pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; |
49 | pi->pgp_num_mask = | 49 | pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; |
50 | (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; | ||
51 | pi->lpg_num_mask = | ||
52 | (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; | ||
53 | pi->lpgp_num_mask = | ||
54 | (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; | ||
55 | } | 50 | } |
56 | 51 | ||
57 | /* | 52 | /* |
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) | |||
452 | return 0; | 447 | return 0; |
453 | } | 448 | } |
454 | 449 | ||
455 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | 450 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) |
456 | { | 451 | { |
457 | struct ceph_pg_pool_info *pi; | 452 | struct ceph_pg_pool_info *pi; |
458 | struct rb_node *n = root->rb_node; | 453 | struct rb_node *n = root->rb_node; |
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
508 | 503 | ||
509 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | 504 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
510 | { | 505 | { |
511 | unsigned int n, m; | 506 | u8 ev, cv; |
507 | unsigned len, num; | ||
508 | void *pool_end; | ||
509 | |||
510 | ceph_decode_need(p, end, 2 + 4, bad); | ||
511 | ev = ceph_decode_8(p); /* encoding version */ | ||
512 | cv = ceph_decode_8(p); /* compat version */ | ||
513 | if (ev < 5) { | ||
514 | pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); | ||
515 | return -EINVAL; | ||
516 | } | ||
517 | if (cv > 7) { | ||
518 | pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); | ||
519 | return -EINVAL; | ||
520 | } | ||
521 | len = ceph_decode_32(p); | ||
522 | ceph_decode_need(p, end, len, bad); | ||
523 | pool_end = *p + len; | ||
512 | 524 | ||
513 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 525 | pi->type = ceph_decode_8(p); |
514 | calc_pg_masks(pi); | 526 | pi->size = ceph_decode_8(p); |
527 | pi->crush_ruleset = ceph_decode_8(p); | ||
528 | pi->object_hash = ceph_decode_8(p); | ||
515 | 529 | ||
516 | /* num_snaps * snap_info_t */ | 530 | pi->pg_num = ceph_decode_32(p); |
517 | n = le32_to_cpu(pi->v.num_snaps); | 531 | pi->pgp_num = ceph_decode_32(p); |
518 | while (n--) { | 532 | |
519 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | 533 | *p += 4 + 4; /* skip lpg* */ |
520 | sizeof(struct ceph_timespec), bad); | 534 | *p += 4; /* skip last_change */ |
521 | *p += sizeof(u64) + /* key */ | 535 | *p += 8 + 4; /* skip snap_seq, snap_epoch */ |
522 | 1 + sizeof(u64) + /* u8, snapid */ | 536 | |
523 | sizeof(struct ceph_timespec); | 537 | /* skip snaps */ |
524 | m = ceph_decode_32(p); /* snap name */ | 538 | num = ceph_decode_32(p); |
525 | *p += m; | 539 | while (num--) { |
540 | *p += 8; /* snapid key */ | ||
541 | *p += 1 + 1; /* versions */ | ||
542 | len = ceph_decode_32(p); | ||
543 | *p += len; | ||
526 | } | 544 | } |
527 | 545 | ||
528 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 546 | /* skip removed snaps */ |
547 | num = ceph_decode_32(p); | ||
548 | *p += num * (8 + 8); | ||
549 | |||
550 | *p += 8; /* skip auid */ | ||
551 | pi->flags = ceph_decode_64(p); | ||
552 | |||
553 | /* ignore the rest */ | ||
554 | |||
555 | *p = pool_end; | ||
556 | calc_pg_masks(pi); | ||
529 | return 0; | 557 | return 0; |
530 | 558 | ||
531 | bad: | 559 | bad: |
@@ -535,14 +563,15 @@ bad: | |||
535 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 563 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
536 | { | 564 | { |
537 | struct ceph_pg_pool_info *pi; | 565 | struct ceph_pg_pool_info *pi; |
538 | u32 num, len, pool; | 566 | u32 num, len; |
567 | u64 pool; | ||
539 | 568 | ||
540 | ceph_decode_32_safe(p, end, num, bad); | 569 | ceph_decode_32_safe(p, end, num, bad); |
541 | dout(" %d pool names\n", num); | 570 | dout(" %d pool names\n", num); |
542 | while (num--) { | 571 | while (num--) { |
543 | ceph_decode_32_safe(p, end, pool, bad); | 572 | ceph_decode_64_safe(p, end, pool, bad); |
544 | ceph_decode_32_safe(p, end, len, bad); | 573 | ceph_decode_32_safe(p, end, len, bad); |
545 | dout(" pool %d len %d\n", pool, len); | 574 | dout(" pool %llu len %d\n", pool, len); |
546 | ceph_decode_need(p, end, len, bad); | 575 | ceph_decode_need(p, end, len, bad); |
547 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 576 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
548 | if (pi) { | 577 | if (pi) { |
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
633 | struct ceph_osdmap *map; | 662 | struct ceph_osdmap *map; |
634 | u16 version; | 663 | u16 version; |
635 | u32 len, max, i; | 664 | u32 len, max, i; |
636 | u8 ev; | ||
637 | int err = -EINVAL; | 665 | int err = -EINVAL; |
638 | void *start = *p; | 666 | void *start = *p; |
639 | struct ceph_pg_pool_info *pi; | 667 | struct ceph_pg_pool_info *pi; |
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
646 | map->pg_temp = RB_ROOT; | 674 | map->pg_temp = RB_ROOT; |
647 | 675 | ||
648 | ceph_decode_16_safe(p, end, version, bad); | 676 | ceph_decode_16_safe(p, end, version, bad); |
649 | if (version > CEPH_OSDMAP_VERSION) { | 677 | if (version > 6) { |
650 | pr_warning("got unknown v %d > %d of osdmap\n", version, | 678 | pr_warning("got unknown v %d > 6 of osdmap\n", version); |
651 | CEPH_OSDMAP_VERSION); | 679 | goto bad; |
680 | } | ||
681 | if (version < 6) { | ||
682 | pr_warning("got old v %d < 6 of osdmap\n", version); | ||
652 | goto bad; | 683 | goto bad; |
653 | } | 684 | } |
654 | 685 | ||
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
660 | 691 | ||
661 | ceph_decode_32_safe(p, end, max, bad); | 692 | ceph_decode_32_safe(p, end, max, bad); |
662 | while (max--) { | 693 | while (max--) { |
663 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); | 694 | ceph_decode_need(p, end, 8 + 2, bad); |
664 | err = -ENOMEM; | 695 | err = -ENOMEM; |
665 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | 696 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
666 | if (!pi) | 697 | if (!pi) |
667 | goto bad; | 698 | goto bad; |
668 | pi->id = ceph_decode_32(p); | 699 | pi->id = ceph_decode_64(p); |
669 | err = -EINVAL; | ||
670 | ev = ceph_decode_8(p); /* encoding version */ | ||
671 | if (ev > CEPH_PG_POOL_VERSION) { | ||
672 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | ||
673 | ev, CEPH_PG_POOL_VERSION); | ||
674 | kfree(pi); | ||
675 | goto bad; | ||
676 | } | ||
677 | err = __decode_pool(p, end, pi); | 700 | err = __decode_pool(p, end, pi); |
678 | if (err < 0) { | 701 | if (err < 0) { |
679 | kfree(pi); | 702 | kfree(pi); |
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
682 | __insert_pg_pool(&map->pg_pools, pi); | 705 | __insert_pg_pool(&map->pg_pools, pi); |
683 | } | 706 | } |
684 | 707 | ||
685 | if (version >= 5) { | 708 | err = __decode_pool_names(p, end, map); |
686 | err = __decode_pool_names(p, end, map); | 709 | if (err < 0) { |
687 | if (err < 0) { | 710 | dout("fail to decode pool names"); |
688 | dout("fail to decode pool names"); | 711 | goto bad; |
689 | goto bad; | ||
690 | } | ||
691 | } | 712 | } |
692 | 713 | ||
693 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 714 | ceph_decode_32_safe(p, end, map->pool_max, bad); |
@@ -788,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
788 | struct ceph_fsid fsid; | 809 | struct ceph_fsid fsid; |
789 | u32 epoch = 0; | 810 | u32 epoch = 0; |
790 | struct ceph_timespec modified; | 811 | struct ceph_timespec modified; |
791 | u32 len, pool; | 812 | s32 len; |
792 | __s32 new_pool_max, new_flags, max; | 813 | u64 pool; |
814 | __s64 new_pool_max; | ||
815 | __s32 new_flags, max; | ||
793 | void *start = *p; | 816 | void *start = *p; |
794 | int err = -EINVAL; | 817 | int err = -EINVAL; |
795 | u16 version; | 818 | u16 version; |
796 | 819 | ||
797 | ceph_decode_16_safe(p, end, version, bad); | 820 | ceph_decode_16_safe(p, end, version, bad); |
798 | if (version > CEPH_OSDMAP_INC_VERSION) { | 821 | if (version > 6) { |
799 | pr_warning("got unknown v %d > %d of inc osdmap\n", version, | 822 | pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); |
800 | CEPH_OSDMAP_INC_VERSION); | ||
801 | goto bad; | 823 | goto bad; |
802 | } | 824 | } |
803 | 825 | ||
@@ -807,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
807 | epoch = ceph_decode_32(p); | 829 | epoch = ceph_decode_32(p); |
808 | BUG_ON(epoch != map->epoch+1); | 830 | BUG_ON(epoch != map->epoch+1); |
809 | ceph_decode_copy(p, &modified, sizeof(modified)); | 831 | ceph_decode_copy(p, &modified, sizeof(modified)); |
810 | new_pool_max = ceph_decode_32(p); | 832 | new_pool_max = ceph_decode_64(p); |
811 | new_flags = ceph_decode_32(p); | 833 | new_flags = ceph_decode_32(p); |
812 | 834 | ||
813 | /* full map? */ | 835 | /* full map? */ |
@@ -857,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
857 | /* new_pool */ | 879 | /* new_pool */ |
858 | ceph_decode_32_safe(p, end, len, bad); | 880 | ceph_decode_32_safe(p, end, len, bad); |
859 | while (len--) { | 881 | while (len--) { |
860 | __u8 ev; | ||
861 | struct ceph_pg_pool_info *pi; | 882 | struct ceph_pg_pool_info *pi; |
862 | 883 | ||
863 | ceph_decode_32_safe(p, end, pool, bad); | 884 | ceph_decode_64_safe(p, end, pool, bad); |
864 | ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); | ||
865 | ev = ceph_decode_8(p); /* encoding version */ | ||
866 | if (ev > CEPH_PG_POOL_VERSION) { | ||
867 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | ||
868 | ev, CEPH_PG_POOL_VERSION); | ||
869 | err = -EINVAL; | ||
870 | goto bad; | ||
871 | } | ||
872 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 885 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
873 | if (!pi) { | 886 | if (!pi) { |
874 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | 887 | pi = kzalloc(sizeof(*pi), GFP_NOFS); |
@@ -894,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
894 | while (len--) { | 907 | while (len--) { |
895 | struct ceph_pg_pool_info *pi; | 908 | struct ceph_pg_pool_info *pi; |
896 | 909 | ||
897 | ceph_decode_32_safe(p, end, pool, bad); | 910 | ceph_decode_64_safe(p, end, pool, bad); |
898 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 911 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
899 | if (pi) | 912 | if (pi) |
900 | __remove_pg_pool(&map->pg_pools, pi); | 913 | __remove_pg_pool(&map->pg_pools, pi); |
@@ -1097,8 +1110,8 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
1097 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); | 1110 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); |
1098 | if (!pool) | 1111 | if (!pool) |
1099 | return -EIO; | 1112 | return -EIO; |
1100 | pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); | 1113 | pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); |
1101 | num = le32_to_cpu(pool->v.pg_num); | 1114 | num = pool->pg_num; |
1102 | num_mask = pool->pg_num_mask; | 1115 | num_mask = pool->pg_num_mask; |
1103 | 1116 | ||
1104 | dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, | 1117 | dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, |
@@ -1132,8 +1145,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1132 | return NULL; | 1145 | return NULL; |
1133 | 1146 | ||
1134 | /* pg_temp? */ | 1147 | /* pg_temp? */ |
1135 | t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), | 1148 | t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask); |
1136 | pool->pgp_num_mask); | ||
1137 | pgid.seed = t; | 1149 | pgid.seed = t; |
1138 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); | 1150 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); |
1139 | if (pg) { | 1151 | if (pg) { |
@@ -1142,26 +1154,24 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1142 | } | 1154 | } |
1143 | 1155 | ||
1144 | /* crush */ | 1156 | /* crush */ |
1145 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 1157 | ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, |
1146 | pool->v.type, pool->v.size); | 1158 | pool->type, pool->size); |
1147 | if (ruleno < 0) { | 1159 | if (ruleno < 0) { |
1148 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", | 1160 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", |
1149 | poolid, pool->v.crush_ruleset, pool->v.type, | 1161 | poolid, pool->crush_ruleset, pool->type, |
1150 | pool->v.size); | 1162 | pool->size); |
1151 | return NULL; | 1163 | return NULL; |
1152 | } | 1164 | } |
1153 | 1165 | ||
1154 | pps = ceph_stable_mod(ps, | 1166 | pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask); |
1155 | le32_to_cpu(pool->v.pgp_num), | ||
1156 | pool->pgp_num_mask); | ||
1157 | pps += poolid; | 1167 | pps += poolid; |
1158 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, | 1168 | r = crush_do_rule(osdmap->crush, ruleno, pps, osds, |
1159 | min_t(int, pool->v.size, *num), | 1169 | min_t(int, pool->size, *num), |
1160 | osdmap->osd_weight); | 1170 | osdmap->osd_weight); |
1161 | if (r < 0) { | 1171 | if (r < 0) { |
1162 | pr_err("error %d from crush rule: pool %d ruleset %d type %d" | 1172 | pr_err("error %d from crush rule: pool %d ruleset %d type %d" |
1163 | " size %d\n", r, poolid, pool->v.crush_ruleset, | 1173 | " size %d\n", r, poolid, pool->crush_ruleset, |
1164 | pool->v.type, pool->v.size); | 1174 | pool->type, pool->size); |
1165 | return NULL; | 1175 | return NULL; |
1166 | } | 1176 | } |
1167 | *num = r; | 1177 | *num = r; |