aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph/osdmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ceph/osdmap.c')
-rw-r--r--net/ceph/osdmap.c290
1 files changed, 155 insertions, 135 deletions
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index de73214b5d26..69bc4bf89e3e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -13,26 +13,18 @@
13 13
14char *ceph_osdmap_state_str(char *str, int len, int state) 14char *ceph_osdmap_state_str(char *str, int len, int state)
15{ 15{
16 int flag = 0;
17
18 if (!len) 16 if (!len)
19 goto done; 17 return str;
20 18
21 *str = '\0'; 19 if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
22 if (state) { 20 snprintf(str, len, "exists, up");
23 if (state & CEPH_OSD_EXISTS) { 21 else if (state & CEPH_OSD_EXISTS)
24 snprintf(str, len, "exists"); 22 snprintf(str, len, "exists");
25 flag = 1; 23 else if (state & CEPH_OSD_UP)
26 } 24 snprintf(str, len, "up");
27 if (state & CEPH_OSD_UP) { 25 else
28 snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
29 "up");
30 flag = 1;
31 }
32 } else {
33 snprintf(str, len, "doesn't exist"); 26 snprintf(str, len, "doesn't exist");
34 } 27
35done:
36 return str; 28 return str;
37} 29}
38 30
@@ -53,13 +45,8 @@ static int calc_bits_of(unsigned int t)
53 */ 45 */
54static void calc_pg_masks(struct ceph_pg_pool_info *pi) 46static void calc_pg_masks(struct ceph_pg_pool_info *pi)
55{ 47{
56 pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; 48 pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
57 pi->pgp_num_mask = 49 pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
58 (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1;
59 pi->lpg_num_mask =
60 (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1;
61 pi->lpgp_num_mask =
62 (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1;
63} 50}
64 51
65/* 52/*
@@ -170,6 +157,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
170 c->choose_local_tries = 2; 157 c->choose_local_tries = 2;
171 c->choose_local_fallback_tries = 5; 158 c->choose_local_fallback_tries = 5;
172 c->choose_total_tries = 19; 159 c->choose_total_tries = 19;
160 c->chooseleaf_descend_once = 0;
173 161
174 ceph_decode_need(p, end, 4*sizeof(u32), bad); 162 ceph_decode_need(p, end, 4*sizeof(u32), bad);
175 magic = ceph_decode_32(p); 163 magic = ceph_decode_32(p);
@@ -336,6 +324,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
336 dout("crush decode tunable choose_total_tries = %d", 324 dout("crush decode tunable choose_total_tries = %d",
337 c->choose_total_tries); 325 c->choose_total_tries);
338 326
327 ceph_decode_need(p, end, sizeof(u32), done);
328 c->chooseleaf_descend_once = ceph_decode_32(p);
329 dout("crush decode tunable chooseleaf_descend_once = %d",
330 c->chooseleaf_descend_once);
331
339done: 332done:
340 dout("crush_decode success\n"); 333 dout("crush_decode success\n");
341 return c; 334 return c;
@@ -354,12 +347,13 @@ bad:
354 */ 347 */
355static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) 348static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
356{ 349{
357 u64 a = *(u64 *)&l; 350 if (l.pool < r.pool)
358 u64 b = *(u64 *)&r; 351 return -1;
359 352 if (l.pool > r.pool)
360 if (a < b) 353 return 1;
354 if (l.seed < r.seed)
361 return -1; 355 return -1;
362 if (a > b) 356 if (l.seed > r.seed)
363 return 1; 357 return 1;
364 return 0; 358 return 0;
365} 359}
@@ -405,8 +399,8 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
405 } else if (c > 0) { 399 } else if (c > 0) {
406 n = n->rb_right; 400 n = n->rb_right;
407 } else { 401 } else {
408 dout("__lookup_pg_mapping %llx got %p\n", 402 dout("__lookup_pg_mapping %lld.%x got %p\n",
409 *(u64 *)&pgid, pg); 403 pgid.pool, pgid.seed, pg);
410 return pg; 404 return pg;
411 } 405 }
412 } 406 }
@@ -418,12 +412,13 @@ static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
418 struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); 412 struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
419 413
420 if (pg) { 414 if (pg) {
421 dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); 415 dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed,
416 pg);
422 rb_erase(&pg->node, root); 417 rb_erase(&pg->node, root);
423 kfree(pg); 418 kfree(pg);
424 return 0; 419 return 0;
425 } 420 }
426 dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); 421 dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed);
427 return -ENOENT; 422 return -ENOENT;
428} 423}
429 424
@@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
452 return 0; 447 return 0;
453} 448}
454 449
455static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) 450static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
456{ 451{
457 struct ceph_pg_pool_info *pi; 452 struct ceph_pg_pool_info *pi;
458 struct rb_node *n = root->rb_node; 453 struct rb_node *n = root->rb_node;
@@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
508 503
509static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) 504static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
510{ 505{
511 unsigned int n, m; 506 u8 ev, cv;
507 unsigned len, num;
508 void *pool_end;
509
510 ceph_decode_need(p, end, 2 + 4, bad);
511 ev = ceph_decode_8(p); /* encoding version */
512 cv = ceph_decode_8(p); /* compat version */
513 if (ev < 5) {
514 pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
515 return -EINVAL;
516 }
517 if (cv > 7) {
518 pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv);
519 return -EINVAL;
520 }
521 len = ceph_decode_32(p);
522 ceph_decode_need(p, end, len, bad);
523 pool_end = *p + len;
512 524
513 ceph_decode_copy(p, &pi->v, sizeof(pi->v)); 525 pi->type = ceph_decode_8(p);
514 calc_pg_masks(pi); 526 pi->size = ceph_decode_8(p);
527 pi->crush_ruleset = ceph_decode_8(p);
528 pi->object_hash = ceph_decode_8(p);
529
530 pi->pg_num = ceph_decode_32(p);
531 pi->pgp_num = ceph_decode_32(p);
532
533 *p += 4 + 4; /* skip lpg* */
534 *p += 4; /* skip last_change */
535 *p += 8 + 4; /* skip snap_seq, snap_epoch */
515 536
516 /* num_snaps * snap_info_t */ 537 /* skip snaps */
517 n = le32_to_cpu(pi->v.num_snaps); 538 num = ceph_decode_32(p);
518 while (n--) { 539 while (num--) {
519 ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + 540 *p += 8; /* snapid key */
520 sizeof(struct ceph_timespec), bad); 541 *p += 1 + 1; /* versions */
521 *p += sizeof(u64) + /* key */ 542 len = ceph_decode_32(p);
522 1 + sizeof(u64) + /* u8, snapid */ 543 *p += len;
523 sizeof(struct ceph_timespec);
524 m = ceph_decode_32(p); /* snap name */
525 *p += m;
526 } 544 }
527 545
528 *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; 546 /* skip removed snaps */
547 num = ceph_decode_32(p);
548 *p += num * (8 + 8);
549
550 *p += 8; /* skip auid */
551 pi->flags = ceph_decode_64(p);
552
553 /* ignore the rest */
554
555 *p = pool_end;
556 calc_pg_masks(pi);
529 return 0; 557 return 0;
530 558
531bad: 559bad:
@@ -535,14 +563,15 @@ bad:
535static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) 563static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
536{ 564{
537 struct ceph_pg_pool_info *pi; 565 struct ceph_pg_pool_info *pi;
538 u32 num, len, pool; 566 u32 num, len;
567 u64 pool;
539 568
540 ceph_decode_32_safe(p, end, num, bad); 569 ceph_decode_32_safe(p, end, num, bad);
541 dout(" %d pool names\n", num); 570 dout(" %d pool names\n", num);
542 while (num--) { 571 while (num--) {
543 ceph_decode_32_safe(p, end, pool, bad); 572 ceph_decode_64_safe(p, end, pool, bad);
544 ceph_decode_32_safe(p, end, len, bad); 573 ceph_decode_32_safe(p, end, len, bad);
545 dout(" pool %d len %d\n", pool, len); 574 dout(" pool %llu len %d\n", pool, len);
546 ceph_decode_need(p, end, len, bad); 575 ceph_decode_need(p, end, len, bad);
547 pi = __lookup_pg_pool(&map->pg_pools, pool); 576 pi = __lookup_pg_pool(&map->pg_pools, pool);
548 if (pi) { 577 if (pi) {
@@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
633 struct ceph_osdmap *map; 662 struct ceph_osdmap *map;
634 u16 version; 663 u16 version;
635 u32 len, max, i; 664 u32 len, max, i;
636 u8 ev;
637 int err = -EINVAL; 665 int err = -EINVAL;
638 void *start = *p; 666 void *start = *p;
639 struct ceph_pg_pool_info *pi; 667 struct ceph_pg_pool_info *pi;
@@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
646 map->pg_temp = RB_ROOT; 674 map->pg_temp = RB_ROOT;
647 675
648 ceph_decode_16_safe(p, end, version, bad); 676 ceph_decode_16_safe(p, end, version, bad);
649 if (version > CEPH_OSDMAP_VERSION) { 677 if (version > 6) {
650 pr_warning("got unknown v %d > %d of osdmap\n", version, 678 pr_warning("got unknown v %d > 6 of osdmap\n", version);
651 CEPH_OSDMAP_VERSION); 679 goto bad;
680 }
681 if (version < 6) {
682 pr_warning("got old v %d < 6 of osdmap\n", version);
652 goto bad; 683 goto bad;
653 } 684 }
654 685
@@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
660 691
661 ceph_decode_32_safe(p, end, max, bad); 692 ceph_decode_32_safe(p, end, max, bad);
662 while (max--) { 693 while (max--) {
663 ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); 694 ceph_decode_need(p, end, 8 + 2, bad);
664 err = -ENOMEM; 695 err = -ENOMEM;
665 pi = kzalloc(sizeof(*pi), GFP_NOFS); 696 pi = kzalloc(sizeof(*pi), GFP_NOFS);
666 if (!pi) 697 if (!pi)
667 goto bad; 698 goto bad;
668 pi->id = ceph_decode_32(p); 699 pi->id = ceph_decode_64(p);
669 err = -EINVAL;
670 ev = ceph_decode_8(p); /* encoding version */
671 if (ev > CEPH_PG_POOL_VERSION) {
672 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
673 ev, CEPH_PG_POOL_VERSION);
674 kfree(pi);
675 goto bad;
676 }
677 err = __decode_pool(p, end, pi); 700 err = __decode_pool(p, end, pi);
678 if (err < 0) { 701 if (err < 0) {
679 kfree(pi); 702 kfree(pi);
@@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
682 __insert_pg_pool(&map->pg_pools, pi); 705 __insert_pg_pool(&map->pg_pools, pi);
683 } 706 }
684 707
685 if (version >= 5) { 708 err = __decode_pool_names(p, end, map);
686 err = __decode_pool_names(p, end, map); 709 if (err < 0) {
687 if (err < 0) { 710 dout("fail to decode pool names");
688 dout("fail to decode pool names"); 711 goto bad;
689 goto bad;
690 }
691 } 712 }
692 713
693 ceph_decode_32_safe(p, end, map->pool_max, bad); 714 ceph_decode_32_safe(p, end, map->pool_max, bad);
@@ -724,10 +745,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
724 for (i = 0; i < len; i++) { 745 for (i = 0; i < len; i++) {
725 int n, j; 746 int n, j;
726 struct ceph_pg pgid; 747 struct ceph_pg pgid;
748 struct ceph_pg_v1 pgid_v1;
727 struct ceph_pg_mapping *pg; 749 struct ceph_pg_mapping *pg;
728 750
729 ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); 751 ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
730 ceph_decode_copy(p, &pgid, sizeof(pgid)); 752 ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
753 pgid.pool = le32_to_cpu(pgid_v1.pool);
754 pgid.seed = le16_to_cpu(pgid_v1.ps);
731 n = ceph_decode_32(p); 755 n = ceph_decode_32(p);
732 err = -EINVAL; 756 err = -EINVAL;
733 if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) 757 if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
@@ -745,7 +769,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
745 err = __insert_pg_mapping(pg, &map->pg_temp); 769 err = __insert_pg_mapping(pg, &map->pg_temp);
746 if (err) 770 if (err)
747 goto bad; 771 goto bad;
748 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); 772 dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed,
773 len);
749 } 774 }
750 775
751 /* crush */ 776 /* crush */
@@ -784,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
784 struct ceph_fsid fsid; 809 struct ceph_fsid fsid;
785 u32 epoch = 0; 810 u32 epoch = 0;
786 struct ceph_timespec modified; 811 struct ceph_timespec modified;
787 u32 len, pool; 812 s32 len;
788 __s32 new_pool_max, new_flags, max; 813 u64 pool;
814 __s64 new_pool_max;
815 __s32 new_flags, max;
789 void *start = *p; 816 void *start = *p;
790 int err = -EINVAL; 817 int err = -EINVAL;
791 u16 version; 818 u16 version;
792 819
793 ceph_decode_16_safe(p, end, version, bad); 820 ceph_decode_16_safe(p, end, version, bad);
794 if (version > CEPH_OSDMAP_INC_VERSION) { 821 if (version > 6) {
795 pr_warning("got unknown v %d > %d of inc osdmap\n", version, 822 pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6);
796 CEPH_OSDMAP_INC_VERSION);
797 goto bad; 823 goto bad;
798 } 824 }
799 825
@@ -803,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
803 epoch = ceph_decode_32(p); 829 epoch = ceph_decode_32(p);
804 BUG_ON(epoch != map->epoch+1); 830 BUG_ON(epoch != map->epoch+1);
805 ceph_decode_copy(p, &modified, sizeof(modified)); 831 ceph_decode_copy(p, &modified, sizeof(modified));
806 new_pool_max = ceph_decode_32(p); 832 new_pool_max = ceph_decode_64(p);
807 new_flags = ceph_decode_32(p); 833 new_flags = ceph_decode_32(p);
808 834
809 /* full map? */ 835 /* full map? */
@@ -853,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
853 /* new_pool */ 879 /* new_pool */
854 ceph_decode_32_safe(p, end, len, bad); 880 ceph_decode_32_safe(p, end, len, bad);
855 while (len--) { 881 while (len--) {
856 __u8 ev;
857 struct ceph_pg_pool_info *pi; 882 struct ceph_pg_pool_info *pi;
858 883
859 ceph_decode_32_safe(p, end, pool, bad); 884 ceph_decode_64_safe(p, end, pool, bad);
860 ceph_decode_need(p, end, 1 + sizeof(pi->v), bad);
861 ev = ceph_decode_8(p); /* encoding version */
862 if (ev > CEPH_PG_POOL_VERSION) {
863 pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
864 ev, CEPH_PG_POOL_VERSION);
865 err = -EINVAL;
866 goto bad;
867 }
868 pi = __lookup_pg_pool(&map->pg_pools, pool); 885 pi = __lookup_pg_pool(&map->pg_pools, pool);
869 if (!pi) { 886 if (!pi) {
870 pi = kzalloc(sizeof(*pi), GFP_NOFS); 887 pi = kzalloc(sizeof(*pi), GFP_NOFS);
@@ -890,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
890 while (len--) { 907 while (len--) {
891 struct ceph_pg_pool_info *pi; 908 struct ceph_pg_pool_info *pi;
892 909
893 ceph_decode_32_safe(p, end, pool, bad); 910 ceph_decode_64_safe(p, end, pool, bad);
894 pi = __lookup_pg_pool(&map->pg_pools, pool); 911 pi = __lookup_pg_pool(&map->pg_pools, pool);
895 if (pi) 912 if (pi)
896 __remove_pg_pool(&map->pg_pools, pi); 913 __remove_pg_pool(&map->pg_pools, pi);
@@ -946,10 +963,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
946 while (len--) { 963 while (len--) {
947 struct ceph_pg_mapping *pg; 964 struct ceph_pg_mapping *pg;
948 int j; 965 int j;
966 struct ceph_pg_v1 pgid_v1;
949 struct ceph_pg pgid; 967 struct ceph_pg pgid;
950 u32 pglen; 968 u32 pglen;
951 ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); 969 ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
952 ceph_decode_copy(p, &pgid, sizeof(pgid)); 970 ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1));
971 pgid.pool = le32_to_cpu(pgid_v1.pool);
972 pgid.seed = le16_to_cpu(pgid_v1.ps);
953 pglen = ceph_decode_32(p); 973 pglen = ceph_decode_32(p);
954 974
955 if (pglen) { 975 if (pglen) {
@@ -975,8 +995,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
975 kfree(pg); 995 kfree(pg);
976 goto bad; 996 goto bad;
977 } 997 }
978 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, 998 dout(" added pg_temp %lld.%x len %d\n", pgid.pool,
979 pglen); 999 pgid.seed, pglen);
980 } else { 1000 } else {
981 /* remove */ 1001 /* remove */
982 __remove_pg_mapping(&map->pg_temp, pgid); 1002 __remove_pg_mapping(&map->pg_temp, pgid);
@@ -1010,7 +1030,7 @@ bad:
1010 * pass a stride back to the caller. 1030 * pass a stride back to the caller.
1011 */ 1031 */
1012int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, 1032int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1013 u64 off, u64 *plen, 1033 u64 off, u64 len,
1014 u64 *ono, 1034 u64 *ono,
1015 u64 *oxoff, u64 *oxlen) 1035 u64 *oxoff, u64 *oxlen)
1016{ 1036{
@@ -1021,7 +1041,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1021 u32 su_per_object; 1041 u32 su_per_object;
1022 u64 t, su_offset; 1042 u64 t, su_offset;
1023 1043
1024 dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, 1044 dout("mapping %llu~%llu osize %u fl_su %u\n", off, len,
1025 osize, su); 1045 osize, su);
1026 if (su == 0 || sc == 0) 1046 if (su == 0 || sc == 0)
1027 goto invalid; 1047 goto invalid;
@@ -1054,11 +1074,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
1054 1074
1055 /* 1075 /*
1056 * Calculate the length of the extent being written to the selected 1076 * Calculate the length of the extent being written to the selected
1057 * object. This is the minimum of the full length requested (plen) or 1077 * object. This is the minimum of the full length requested (len) or
1058 * the remainder of the current stripe being written to. 1078 * the remainder of the current stripe being written to.
1059 */ 1079 */
1060 *oxlen = min_t(u64, *plen, su - su_offset); 1080 *oxlen = min_t(u64, len, su - su_offset);
1061 *plen = *oxlen;
1062 1081
1063 dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); 1082 dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
1064 return 0; 1083 return 0;
@@ -1076,33 +1095,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
1076 * calculate an object layout (i.e. pgid) from an oid, 1095 * calculate an object layout (i.e. pgid) from an oid,
1077 * file_layout, and osdmap 1096 * file_layout, and osdmap
1078 */ 1097 */
1079int ceph_calc_object_layout(struct ceph_object_layout *ol, 1098int ceph_calc_object_layout(struct ceph_pg *pg,
1080 const char *oid, 1099 const char *oid,
1081 struct ceph_file_layout *fl, 1100 struct ceph_file_layout *fl,
1082 struct ceph_osdmap *osdmap) 1101 struct ceph_osdmap *osdmap)
1083{ 1102{
1084 unsigned int num, num_mask; 1103 unsigned int num, num_mask;
1085 struct ceph_pg pgid;
1086 int poolid = le32_to_cpu(fl->fl_pg_pool);
1087 struct ceph_pg_pool_info *pool; 1104 struct ceph_pg_pool_info *pool;
1088 unsigned int ps;
1089 1105
1090 BUG_ON(!osdmap); 1106 BUG_ON(!osdmap);
1091 1107 pg->pool = le32_to_cpu(fl->fl_pg_pool);
1092 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); 1108 pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool);
1093 if (!pool) 1109 if (!pool)
1094 return -EIO; 1110 return -EIO;
1095 ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); 1111 pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid));
1096 num = le32_to_cpu(pool->v.pg_num); 1112 num = pool->pg_num;
1097 num_mask = pool->pg_num_mask; 1113 num_mask = pool->pg_num_mask;
1098 1114
1099 pgid.ps = cpu_to_le16(ps); 1115 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed);
1100 pgid.preferred = cpu_to_le16(-1);
1101 pgid.pool = fl->fl_pg_pool;
1102 dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
1103
1104 ol->ol_pgid = pgid;
1105 ol->ol_stripe_unit = fl->fl_object_stripe_unit;
1106 return 0; 1116 return 0;
1107} 1117}
1108EXPORT_SYMBOL(ceph_calc_object_layout); 1118EXPORT_SYMBOL(ceph_calc_object_layout);
@@ -1117,19 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1117 struct ceph_pg_mapping *pg; 1127 struct ceph_pg_mapping *pg;
1118 struct ceph_pg_pool_info *pool; 1128 struct ceph_pg_pool_info *pool;
1119 int ruleno; 1129 int ruleno;
1120 unsigned int poolid, ps, pps, t, r; 1130 int r;
1121 1131 u32 pps;
1122 poolid = le32_to_cpu(pgid.pool);
1123 ps = le16_to_cpu(pgid.ps);
1124 1132
1125 pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); 1133 pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool);
1126 if (!pool) 1134 if (!pool)
1127 return NULL; 1135 return NULL;
1128 1136
1129 /* pg_temp? */ 1137 /* pg_temp? */
1130 t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), 1138 pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
1131 pool->pgp_num_mask); 1139 pool->pgp_num_mask);
1132 pgid.ps = cpu_to_le16(t);
1133 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); 1140 pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
1134 if (pg) { 1141 if (pg) {
1135 *num = pg->len; 1142 *num = pg->len;
@@ -1137,26 +1144,39 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1137 } 1144 }
1138 1145
1139 /* crush */ 1146 /* crush */
1140 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, 1147 ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset,
1141 pool->v.type, pool->v.size); 1148 pool->type, pool->size);
1142 if (ruleno < 0) { 1149 if (ruleno < 0) {
1143 pr_err("no crush rule pool %d ruleset %d type %d size %d\n", 1150 pr_err("no crush rule pool %lld ruleset %d type %d size %d\n",
1144 poolid, pool->v.crush_ruleset, pool->v.type, 1151 pgid.pool, pool->crush_ruleset, pool->type,
1145 pool->v.size); 1152 pool->size);
1146 return NULL; 1153 return NULL;
1147 } 1154 }
1148 1155
1149 pps = ceph_stable_mod(ps, 1156 if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
1150 le32_to_cpu(pool->v.pgp_num), 1157 /* hash pool id and seed sothat pool PGs do not overlap */
1151 pool->pgp_num_mask); 1158 pps = crush_hash32_2(CRUSH_HASH_RJENKINS1,
1152 pps += poolid; 1159 ceph_stable_mod(pgid.seed, pool->pgp_num,
1160 pool->pgp_num_mask),
1161 pgid.pool);
1162 } else {
1163 /*
1164 * legacy ehavior: add ps and pool together. this is
1165 * not a great approach because the PGs from each pool
1166 * will overlap on top of each other: 0.5 == 1.4 ==
1167 * 2.3 == ...
1168 */
1169 pps = ceph_stable_mod(pgid.seed, pool->pgp_num,
1170 pool->pgp_num_mask) +
1171 (unsigned)pgid.pool;
1172 }
1153 r = crush_do_rule(osdmap->crush, ruleno, pps, osds, 1173 r = crush_do_rule(osdmap->crush, ruleno, pps, osds,
1154 min_t(int, pool->v.size, *num), 1174 min_t(int, pool->size, *num),
1155 osdmap->osd_weight); 1175 osdmap->osd_weight);
1156 if (r < 0) { 1176 if (r < 0) {
1157 pr_err("error %d from crush rule: pool %d ruleset %d type %d" 1177 pr_err("error %d from crush rule: pool %lld ruleset %d type %d"
1158 " size %d\n", r, poolid, pool->v.crush_ruleset, 1178 " size %d\n", r, pgid.pool, pool->crush_ruleset,
1159 pool->v.type, pool->v.size); 1179 pool->type, pool->size);
1160 return NULL; 1180 return NULL;
1161 } 1181 }
1162 *num = r; 1182 *num = r;