aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/osdmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r--net/ceph/osdmap.c162
1 files changed, 86 insertions, 76 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 81118db5bd11..911919320d2e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -45,13 +45,8 @@ static int calc_bits_of(unsigned int t)
45 */ 45 */
46static void calc_pg_masks(struct ceph_pg_pool_info *pi) 46static void calc_pg_masks(struct ceph_pg_pool_info *pi)
47{ 47{
48 pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; 48 pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
49 pi->pgp_num_mask = 49 pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
50 (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
51 pi->lpg_num_mask =
52 (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
53 pi->lpgp_num_mask =
54 (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
55} 50}
56 51
57/* 52/*
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
452 return 0; 447 return 0;
453} 448}
454 449
455static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) 450static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
456{ 451{
457 struct ceph_pg_pool_info *pi; 452 struct ceph_pg_pool_info *pi;
458 struct rb_node *n = root->rb_node; 453 struct rb_node *n = root->rb_node;
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
508 503
509static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) 504static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
510{ 505{
511 unsigned int n, m; 506 u8 ev, cv;
507 unsigned len, num;
508 void *pool_end;
509
510 ceph_decode_need(p, end, 2 + 4, bad);
511 ev = ceph_decode_8(p); /* encoding version */
512 cv = ceph_decode_8(p); /* compat version */
513 if (ev < 5) {
514 pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
515 return -EINVAL;
516 }
517 if (cv > 7) {
518 pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
519 return -EINVAL;
520 }
521 len = ceph_decode_32(p);
522 ceph_decode_need(p, end, len, bad);
523 pool_end = *p + len;
512 524
513 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 525 pi->type = ceph_decode_8(p);
514 calc_pg_masks(pi); 526 pi->size = ceph_decode_8(p);
527 pi->crush_ruleset = ceph_decode_8(p);
528 pi->object_hash = ceph_decode_8(p);
515 529
516 /* num_snaps * snap_info_t */ 530 pi->pg_num = ceph_decode_32(p);
517 n = le32_to_cpu(pi->v.num_snaps); 531 pi->pgp_num = ceph_decode_32(p);
518 while (n--) { 532
519 ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + 533 *p += 4 + 4; /* skip lpg* */
520 sizeof(struct ceph_timespec), bad); 534 *p += 4; /* skip last_change */
521 *p += sizeof(u64) + /* key */ 535 *p += 8 + 4; /* skip snap_seq, snap_epoch */
522 1 + sizeof(u64) + /* u8, snapid */ 536
523 sizeof(struct ceph_timespec); 537 /* skip snaps */
524 m = ceph_decode_32(p); /* snap name */ 538 num = ceph_decode_32(p);
525 *p += m; 539 while (num--) {
540 *p += 8; /* snapid key */
541 *p += 1 + 1; /* versions */
542 len = ceph_decode_32(p);
543 *p += len;
526 } 544 }
527 545
528 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; 546 /* skip removed snaps */
547 num = ceph_decode_32(p);
548 *p += num * (8 + 8);
549
550 *p += 8; /* skip auid */
551 pi->flags = ceph_decode_64(p);
552
553 /* ignore the rest */
554
555 *p = pool_end;
556 calc_pg_masks(pi);
529 return 0; 557 return 0;
530 558
531bad: 559bad:
@@ -535,14 +563,15 @@ bad:
535static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) 563static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
536{ 564{
537 struct ceph_pg_pool_info *pi; 565 struct ceph_pg_pool_info *pi;
538 u32 num, len, pool; 566 u32 num, len;
567 u64 pool;
539 568
540 ceph_decode_32_safe(p, end, num, bad); 569 ceph_decode_32_safe(p, end, num, bad);
541 dout(" %d pool names\n", num); 570 dout(" %d pool names\n", num);
542 while (num--) { 571 while (num--) {
543 ceph_decode_32_safe(p, end, pool, bad); 572 ceph_decode_64_safe(p, end, pool, bad);
544 ceph_decode_32_safe(p, end, len, bad); 573 ceph_decode_32_safe(p, end, len, bad);
545 dout(" pool %d len %d\n", pool, len); 574 dout(" pool %llu len %d\n", pool, len);
546 ceph_decode_need(p, end, len, bad); 575 ceph_decode_need(p, end, len, bad);
547 pi = __lookup_pg_pool(&map->pg_pools, pool); 576 pi = __lookup_pg_pool(&map->pg_pools, pool);
548 if (pi) { 577 if (pi) {
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
633 struct ceph_osdmap *map; 662 struct ceph_osdmap *map;
634 u16 version; 663 u16 version;
635 u32 len, max, i; 664 u32 len, max, i;
636 u8 ev;
637 int err = -EINVAL; 665 int err = -EINVAL;
638 void *start = *p; 666 void *start = *p;
639 struct ceph_pg_pool_info *pi; 667 struct ceph_pg_pool_info *pi;
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
646 map->pg_temp = RB_ROOT; 674 map->pg_temp = RB_ROOT;
647 675
648 ceph_decode_16_safe(p, end, version, bad); 676 ceph_decode_16_safe(p, end, version, bad);
649 if (version > CEPH_OSDMAP_VERSION) { 677 if (version > 6) {
650 pr_warning("got unknown v %d > %d of osdmap\n", version, 678 pr_warning("got unknown v %d > 6 of osdmap\n", version);
651 CEPH_OSDMAP_VERSION); 679 goto bad;
680 }
681 if (version < 6) {
682 pr_warning("got old v %d < 6 of osdmap\n", version);
652 goto bad; 683 goto bad;
653 } 684 }
654 685
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
660 691
661 ceph_decode_32_safe(p, end, max, bad); 692 ceph_decode_32_safe(p, end, max, bad);
662 while (max--) { 693 while (max--) {
663 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); 694 ceph_decode_need(p, end, 8 + 2, bad);
664 err = -ENOMEM; 695 err = -ENOMEM;
665 pi = kzalloc(sizeof(*pi), GFP_NOFS); 696 pi = kzalloc(sizeof(*pi), GFP_NOFS);
666 if (!pi) 697 if (!pi)
667 goto bad; 698 goto bad;
668 pi->id = ceph_decode_32(p); 699 pi->id = ceph_decode_64(p);
669 err = -EINVAL;
670 ev = ceph_decode_8(p); /* encoding version */
671 if (ev > CEPH_PG_POOL_VERSION) {
672 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
673 ev, CEPH_PG_POOL_VERSION);
674 kfree(pi);
675 goto bad;
676 }
677 err = __decode_pool(p, end, pi); 700 err = __decode_pool(p, end, pi);
678 if (err < 0) { 701 if (err < 0) {
679 kfree(pi); 702 kfree(pi);
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
682 __insert_pg_pool(&map->pg_pools, pi); 705 __insert_pg_pool(&map->pg_pools, pi);
683 } 706 }
684 707
685 if (version >= 5) { 708 err = __decode_pool_names(p, end, map);
686 err = __decode_pool_names(p, end, map); 709 if (err < 0) {
687 if (err < 0) { 710 dout("fail to decode pool names");
688 dout("fail to decode pool names"); 711 goto bad;
689 goto bad;
690 }
691 } 712 }
692 713
693 ceph_decode_32_safe(p, end, map->pool_max, bad); 714 ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -788,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
788 struct ceph_fsid fsid; 809 struct ceph_fsid fsid;
789 u32 epoch = 0; 810 u32 epoch = 0;
790 struct ceph_timespec modified; 811 struct ceph_timespec modified;
791 u32 len, pool; 812 s32 len;
792 __s32 new_pool_max, new_flags, max; 813 u64 pool;
814 __s64 new_pool_max;
815 __s32 new_flags, max;
793 void *start = *p; 816 void *start = *p;
794 int err = -EINVAL; 817 int err = -EINVAL;
795 u16 version; 818 u16 version;
796 819
797 ceph_decode_16_safe(p, end, version, bad); 820 ceph_decode_16_safe(p, end, version, bad);
798 if (version > CEPH_OSDMAP_INC_VERSION) { 821 if (version > 6) {
799 pr_warning("got unknown v %d > %d of inc osdmap\n", version, 822 pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6);
800 CEPH_OSDMAP_INC_VERSION);
801 goto bad; 823 goto bad;
802 } 824 }
803 825
@@ -807,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
807 epoch = ceph_decode_32(p); 829 epoch = ceph_decode_32(p);
808 BUG_ON(epoch != map->epoch+1); 830 BUG_ON(epoch != map->epoch+1);
809 ceph_decode_copy(p, &modified, sizeof(modified)); 831 ceph_decode_copy(p, &modified, sizeof(modified));
810 new_pool_max = ceph_decode_32(p); 832 new_pool_max = ceph_decode_64(p);
811 new_flags = ceph_decode_32(p); 833 new_flags = ceph_decode_32(p);
812 834
813 /* full map? */ 835 /* full map? */
@@ -857,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
857 /* new_pool */ 879 /* new_pool */
858 ceph_decode_32_safe(p, end, len, bad); 880 ceph_decode_32_safe(p, end, len, bad);
859 while (len--) { 881 while (len--) {
860 __u8 ev;
861 struct ceph_pg_pool_info *pi; 882 struct ceph_pg_pool_info *pi;
862 883
863 ceph_decode_32_safe(p, end, pool, bad); 884 ceph_decode_64_safe(p, end, pool, bad);
864 ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
865 ev = ceph_decode_8(p); /* encoding version */
866 if (ev > CEPH_PG_POOL_VERSION) {
867 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
868 ev, CEPH_PG_POOL_VERSION);
869 err = -EINVAL;
870 goto bad;
871 }
872 pi = __lookup_pg_pool(&map->pg_pools, pool); 885 pi = __lookup_pg_pool(&map->pg_pools, pool);
873 if (!pi) { 886 if (!pi) {
874 pi = kzalloc(sizeof(*pi), GFP_NOFS); 887 pi = kzalloc(sizeof(*pi), GFP_NOFS);
@@ -894,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
894 while (len--) { 907 while (len--) {
895 struct ceph_pg_pool_info *pi; 908 struct ceph_pg_pool_info *pi;
896 909
897 ceph_decode_32_safe(p, end, pool, bad); 910 ceph_decode_64_safe(p, end, pool, bad);
898 pi = __lookup_pg_pool(&map->pg_pools, pool); 911 pi = __lookup_pg_pool(&map->pg_pools, pool);
899 if (pi) 912 if (pi)
900 __remove_pg_pool(&map->pg_pools, pi); 913 __remove_pg_pool(&map->pg_pools, pi);
@@ -1097,8 +1110,8 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
1097 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); 1110 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
1098 if (!pool) 1111 if (!pool)
1099 return -EIO; 1112 return -EIO;
1100 pgid.seed = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); 1113 pgid.seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
1101 num = le32_to_cpu(pool->v.pg_num); 1114 num = pool->pg_num;
1102 num_mask = pool->pg_num_mask; 1115 num_mask = pool->pg_num_mask;
1103 1116
1104 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool, 1117 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pgid.pool,
@@ -1132,8 +1145,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1132 return NULL; 1145 return NULL;
1133 1146
1134 /* pg_temp? */ 1147 /* pg_temp? */
1135 t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), 1148 t = ceph_stable_mod(ps, pool->pg_num, pool->pgp_num_mask);
1136 pool->pgp_num_mask);
1137 pgid.seed = t; 1149 pgid.seed = t;
1138 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1150 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1139 if (pg) { 1151 if (pg) {
@@ -1142,26 +1154,24 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1142 } 1154 }
1143 1155
1144 /* crush */ 1156 /* crush */
1145 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, 1157 ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
1146 pool->v.type, pool->v.size); 1158 pool->type, pool->size);
1147 if (ruleno < 0) { 1159 if (ruleno < 0) {
1148 pr_err("no crush rule pool %d ruleset %d type %d size %d\n", 1160 pr_err("no crush rule pool %d ruleset %d type %d size %d\n",
1149 poolid, pool->v.crush_ruleset, pool->v.type, 1161 poolid, pool->crush_ruleset, pool->type,
1150 pool->v.size); 1162 pool->size);
1151 return NULL; 1163 return NULL;
1152 } 1164 }
1153 1165
1154 pps = ceph_stable_mod(ps, 1166 pps = ceph_stable_mod(ps, pool->pgp_num, pool->pgp_num_mask);
1155 le32_to_cpu(pool->v.pgp_num),
1156 pool->pgp_num_mask);
1157 pps += poolid; 1167 pps += poolid;
1158 r = crush_do_rule(osdmap->crush, ruleno, pps, osds, 1168 r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
1159 min_t(int, pool->v.size, *num), 1169 min_t(int, pool->size, *num),
1160 osdmap->osd_weight); 1170 osdmap->osd_weight);
1161 if (r < 0) { 1171 if (r < 0) {
1162 pr_err("error %d from crush rule: pool %d ruleset %d type %d" 1172 pr_err("error %d from crush rule: pool %d ruleset %d type %d"
1163 " size %d\n", r, poolid, pool->v.crush_ruleset, 1173 " size %d\n", r, poolid, pool->crush_ruleset,
1164 pool->v.type, pool->v.size); 1174 pool->type, pool->size);
1165 return NULL; 1175 return NULL;
1166 } 1176 }
1167 *num = r; 1177 *num = r;