aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/osd_client.c8
-rw-r--r--fs/ceph/osdmap.c100
-rw-r--r--fs/ceph/osdmap.h5
-rw-r--r--fs/ceph/rados.h13
5 files changed, 75 insertions, 55 deletions
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index e4f99eff5d93..4c33e19fc241 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -99,7 +99,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
99 u64 len = 1, olen; 99 u64 len = 1, olen;
100 u64 tmp; 100 u64 tmp;
101 struct ceph_object_layout ol; 101 struct ceph_object_layout ol;
102 union ceph_pg pgid; 102 struct ceph_pg pgid;
103 103
104 /* copy and validate */ 104 /* copy and validate */
105 if (copy_from_user(&dl, arg, sizeof(dl))) 105 if (copy_from_user(&dl, arg, sizeof(dl)))
@@ -121,7 +121,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
121 ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, 121 ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
122 osdc->osdmap); 122 osdc->osdmap);
123 123
124 pgid.pg64 = le64_to_cpu(ol.ol_pgid); 124 pgid = ol.ol_pgid;
125 dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); 125 dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
126 if (dl.osd >= 0) { 126 if (dl.osd >= 0) {
127 struct ceph_entity_addr *a = 127 struct ceph_entity_addr *a =
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index 7dc0f6299a52..7db14ba6261c 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -520,7 +520,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
520 struct ceph_osd_request *req) 520 struct ceph_osd_request *req)
521{ 521{
522 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 522 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
523 union ceph_pg pgid; 523 struct ceph_pg pgid;
524 int o = -1; 524 int o = -1;
525 int err; 525 int err;
526 struct ceph_osd *newosd = NULL; 526 struct ceph_osd *newosd = NULL;
@@ -530,7 +530,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
530 &req->r_file_layout, osdc->osdmap); 530 &req->r_file_layout, osdc->osdmap);
531 if (err) 531 if (err)
532 return err; 532 return err;
533 pgid.pg64 = le64_to_cpu(reqhead->layout.ol_pgid); 533 pgid = reqhead->layout.ol_pgid;
534 o = ceph_calc_pg_primary(osdc->osdmap, pgid); 534 o = ceph_calc_pg_primary(osdc->osdmap, pgid);
535 535
536 if ((req->r_osd && req->r_osd->o_osd == o && 536 if ((req->r_osd && req->r_osd->o_osd == o &&
@@ -538,8 +538,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
538 (req->r_osd == NULL && o == -1)) 538 (req->r_osd == NULL && o == -1))
539 return 0; /* no change */ 539 return 0; /* no change */
540 540
541 dout("map_osds tid %llu pgid %llx pool %d osd%d (was osd%d)\n", 541 dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
542 req->r_tid, pgid.pg64, pgid.pg.pool, o, 542 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
543 req->r_osd ? req->r_osd->o_osd : -1); 543 req->r_osd ? req->r_osd->o_osd : -1);
544 544
545 if (req->r_osd) { 545 if (req->r_osd) {
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index cd7bb265d789..8b0cd1107507 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -366,19 +366,33 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
366/* 366/*
367 * Insert a new pg_temp mapping 367 * Insert a new pg_temp mapping
368 */ 368 */
369static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
370{
371 u64 a = *(u64 *)&l;
372 u64 b = *(u64 *)&r;
373
374 if (a < b)
375 return -1;
376 if (a > b)
377 return 1;
378 return 0;
379}
380
369static int __insert_pg_mapping(struct ceph_pg_mapping *new, 381static int __insert_pg_mapping(struct ceph_pg_mapping *new,
370 struct rb_root *root) 382 struct rb_root *root)
371{ 383{
372 struct rb_node **p = &root->rb_node; 384 struct rb_node **p = &root->rb_node;
373 struct rb_node *parent = NULL; 385 struct rb_node *parent = NULL;
374 struct ceph_pg_mapping *pg = NULL; 386 struct ceph_pg_mapping *pg = NULL;
387 int c;
375 388
376 while (*p) { 389 while (*p) {
377 parent = *p; 390 parent = *p;
378 pg = rb_entry(parent, struct ceph_pg_mapping, node); 391 pg = rb_entry(parent, struct ceph_pg_mapping, node);
379 if (new->pgid < pg->pgid) 392 c = pgid_cmp(new->pgid, pg->pgid);
393 if (c < 0)
380 p = &(*p)->rb_left; 394 p = &(*p)->rb_left;
381 else if (new->pgid > pg->pgid) 395 else if (c > 0)
382 p = &(*p)->rb_right; 396 p = &(*p)->rb_right;
383 else 397 else
384 return -EEXIST; 398 return -EEXIST;
@@ -467,11 +481,11 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
467 ceph_decode_32_safe(p, end, len, bad); 481 ceph_decode_32_safe(p, end, len, bad);
468 for (i = 0; i < len; i++) { 482 for (i = 0; i < len; i++) {
469 int n, j; 483 int n, j;
470 u64 pgid; 484 struct ceph_pg pgid;
471 struct ceph_pg_mapping *pg; 485 struct ceph_pg_mapping *pg;
472 486
473 ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); 487 ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
474 pgid = ceph_decode_64(p); 488 ceph_decode_copy(p, &pgid, sizeof(pgid));
475 n = ceph_decode_32(p); 489 n = ceph_decode_32(p);
476 ceph_decode_need(p, end, n * sizeof(u32), bad); 490 ceph_decode_need(p, end, n * sizeof(u32), bad);
477 pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); 491 pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
@@ -487,7 +501,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
487 err = __insert_pg_mapping(pg, &map->pg_temp); 501 err = __insert_pg_mapping(pg, &map->pg_temp);
488 if (err) 502 if (err)
489 goto bad; 503 goto bad;
490 dout(" added pg_temp %llx len %d\n", pgid, len); 504 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
491 } 505 }
492 506
493 /* crush */ 507 /* crush */
@@ -659,19 +673,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
659 while (len--) { 673 while (len--) {
660 struct ceph_pg_mapping *pg; 674 struct ceph_pg_mapping *pg;
661 int j; 675 int j;
662 u64 pgid; 676 struct ceph_pg pgid;
663 u32 pglen; 677 u32 pglen;
664 ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); 678 ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
665 pgid = ceph_decode_64(p); 679 ceph_decode_copy(p, &pgid, sizeof(pgid));
666 pglen = ceph_decode_32(p); 680 pglen = ceph_decode_32(p);
667 681
668 /* remove any? */ 682 /* remove any? */
669 while (rbp && rb_entry(rbp, struct ceph_pg_mapping, 683 while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
670 node)->pgid <= pgid) { 684 node)->pgid, pgid) <= 0) {
671 struct rb_node *cur = rbp; 685 struct rb_node *cur = rbp;
672 rbp = rb_next(rbp); 686 rbp = rb_next(rbp);
673 dout(" removed pg_temp %llx\n", 687 dout(" removed pg_temp %llx\n",
674 rb_entry(cur, struct ceph_pg_mapping, node)->pgid); 688 *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
689 node)->pgid);
675 rb_erase(cur, &map->pg_temp); 690 rb_erase(cur, &map->pg_temp);
676 } 691 }
677 692
@@ -690,14 +705,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
690 err = __insert_pg_mapping(pg, &map->pg_temp); 705 err = __insert_pg_mapping(pg, &map->pg_temp);
691 if (err) 706 if (err)
692 goto bad; 707 goto bad;
693 dout(" added pg_temp %llx len %d\n", pgid, pglen); 708 dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
709 pglen);
694 } 710 }
695 } 711 }
696 while (rbp) { 712 while (rbp) {
697 struct rb_node *cur = rbp; 713 struct rb_node *cur = rbp;
698 rbp = rb_next(rbp); 714 rbp = rb_next(rbp);
699 dout(" removed pg_temp %llx\n", 715 dout(" removed pg_temp %llx\n",
700 rb_entry(cur, struct ceph_pg_mapping, node)->pgid); 716 *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
717 node)->pgid);
701 rb_erase(cur, &map->pg_temp); 718 rb_erase(cur, &map->pg_temp);
702 } 719 }
703 720
@@ -782,16 +799,19 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
782 struct ceph_osdmap *osdmap) 799 struct ceph_osdmap *osdmap)
783{ 800{
784 unsigned num, num_mask; 801 unsigned num, num_mask;
785 union ceph_pg pgid; 802 struct ceph_pg pgid;
786 s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); 803 s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
787 int poolid = le32_to_cpu(fl->fl_pg_pool); 804 int poolid = le32_to_cpu(fl->fl_pg_pool);
788 struct ceph_pg_pool_info *pool; 805 struct ceph_pg_pool_info *pool;
806 unsigned ps;
789 807
790 if (poolid >= osdmap->num_pools) 808 if (poolid >= osdmap->num_pools)
791 return -EIO; 809 return -EIO;
792 pool = &osdmap->pg_pool[poolid];
793 810
811 pool = &osdmap->pg_pool[poolid];
812 ps = ceph_full_name_hash(oid, strlen(oid));
794 if (preferred >= 0) { 813 if (preferred >= 0) {
814 ps += preferred;
795 num = le32_to_cpu(pool->v.lpg_num); 815 num = le32_to_cpu(pool->v.lpg_num);
796 num_mask = pool->lpg_num_mask; 816 num_mask = pool->lpg_num_mask;
797 } else { 817 } else {
@@ -799,22 +819,17 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
799 num_mask = pool->pg_num_mask; 819 num_mask = pool->pg_num_mask;
800 } 820 }
801 821
802 pgid.pg64 = 0; /* start with it zeroed out */ 822 pgid.ps = cpu_to_le16(ps);
803 pgid.pg.ps = ceph_full_name_hash(oid, strlen(oid)); 823 pgid.preferred = cpu_to_le16(preferred);
804 pgid.pg.preferred = preferred; 824 pgid.pool = fl->fl_pg_pool;
805 if (preferred >= 0)
806 pgid.pg.ps += preferred;
807 pgid.pg.pool = le32_to_cpu(fl->fl_pg_pool);
808 if (preferred >= 0) 825 if (preferred >= 0)
809 dout("calc_object_layout '%s' pgid %d.%xp%d (%llx)\n", oid, 826 dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
810 pgid.pg.pool, pgid.pg.ps, (int)preferred, pgid.pg64); 827 (int)preferred);
811 else 828 else
812 dout("calc_object_layout '%s' pgid %d.%x (%llx)\n", oid, 829 dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
813 pgid.pg.pool, pgid.pg.ps, pgid.pg64);
814 830
815 ol->ol_pgid = cpu_to_le64(pgid.pg64); 831 ol->ol_pgid = pgid;
816 ol->ol_stripe_unit = fl->fl_object_stripe_unit; 832 ol->ol_stripe_unit = fl->fl_object_stripe_unit;
817
818 return 0; 833 return 0;
819} 834}
820 835
@@ -822,21 +837,24 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
822 * Calculate raw osd vector for the given pgid. Return pointer to osd 837 * Calculate raw osd vector for the given pgid. Return pointer to osd
823 * array, or NULL on failure. 838 * array, or NULL on failure.
824 */ 839 */
825static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid, 840static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
826 int *osds, int *num) 841 int *osds, int *num)
827{ 842{
828 struct rb_node *n = osdmap->pg_temp.rb_node; 843 struct rb_node *n = osdmap->pg_temp.rb_node;
829 struct ceph_pg_mapping *pg; 844 struct ceph_pg_mapping *pg;
830 struct ceph_pg_pool_info *pool; 845 struct ceph_pg_pool_info *pool;
831 int ruleno; 846 int ruleno;
832 unsigned pps; /* placement ps */ 847 unsigned poolid, ps, pps;
848 int preferred;
849 int c;
833 850
834 /* pg_temp? */ 851 /* pg_temp? */
835 while (n) { 852 while (n) {
836 pg = rb_entry(n, struct ceph_pg_mapping, node); 853 pg = rb_entry(n, struct ceph_pg_mapping, node);
837 if (pgid.pg64 < pg->pgid) 854 c = pgid_cmp(pgid, pg->pgid);
855 if (c < 0)
838 n = n->rb_left; 856 n = n->rb_left;
839 else if (pgid.pg64 > pg->pgid) 857 else if (c > 0)
840 n = n->rb_right; 858 n = n->rb_right;
841 else { 859 else {
842 *num = pg->len; 860 *num = pg->len;
@@ -845,36 +863,40 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid,
845 } 863 }
846 864
847 /* crush */ 865 /* crush */
848 if (pgid.pg.pool >= osdmap->num_pools) 866 poolid = le32_to_cpu(pgid.pool);
867 ps = le16_to_cpu(pgid.ps);
868 preferred = (s16)le16_to_cpu(pgid.preferred);
869
870 if (poolid >= osdmap->num_pools)
849 return NULL; 871 return NULL;
850 pool = &osdmap->pg_pool[pgid.pg.pool]; 872 pool = &osdmap->pg_pool[poolid];
851 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, 873 ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
852 pool->v.type, pool->v.size); 874 pool->v.type, pool->v.size);
853 if (ruleno < 0) { 875 if (ruleno < 0) {
854 pr_err("no crush rule pool %d type %d size %d\n", 876 pr_err("no crush rule pool %d type %d size %d\n",
855 pgid.pg.pool, pool->v.type, pool->v.size); 877 poolid, pool->v.type, pool->v.size);
856 return NULL; 878 return NULL;
857 } 879 }
858 880
859 if (pgid.pg.preferred >= 0) 881 if (preferred >= 0)
860 pps = ceph_stable_mod(pgid.pg.ps, 882 pps = ceph_stable_mod(ps,
861 le32_to_cpu(pool->v.lpgp_num), 883 le32_to_cpu(pool->v.lpgp_num),
862 pool->lpgp_num_mask); 884 pool->lpgp_num_mask);
863 else 885 else
864 pps = ceph_stable_mod(pgid.pg.ps, 886 pps = ceph_stable_mod(ps,
865 le32_to_cpu(pool->v.pgp_num), 887 le32_to_cpu(pool->v.pgp_num),
866 pool->pgp_num_mask); 888 pool->pgp_num_mask);
867 pps += pgid.pg.pool; 889 pps += poolid;
868 *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, 890 *num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
869 min_t(int, pool->v.size, *num), 891 min_t(int, pool->v.size, *num),
870 pgid.pg.preferred, osdmap->osd_weight); 892 preferred, osdmap->osd_weight);
871 return osds; 893 return osds;
872} 894}
873 895
874/* 896/*
875 * Return primary osd for given pgid, or -1 if none. 897 * Return primary osd for given pgid, or -1 if none.
876 */ 898 */
877int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid) 899int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
878{ 900{
879 int rawosds[10], *osds; 901 int rawosds[10], *osds;
880 int i, num = ARRAY_SIZE(rawosds); 902 int i, num = ARRAY_SIZE(rawosds);
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 07127c6fb134..c4af8418aa00 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -25,7 +25,7 @@ struct ceph_pg_pool_info {
25 25
26struct ceph_pg_mapping { 26struct ceph_pg_mapping {
27 struct rb_node node; 27 struct rb_node node;
28 u64 pgid; 28 struct ceph_pg pgid;
29 int len; 29 int len;
30 int osds[]; 30 int osds[];
31}; 31};
@@ -118,6 +118,7 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
118 const char *oid, 118 const char *oid,
119 struct ceph_file_layout *fl, 119 struct ceph_file_layout *fl,
120 struct ceph_osdmap *osdmap); 120 struct ceph_osdmap *osdmap);
121extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid); 121extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
122 struct ceph_pg pgid);
122 123
123#endif 124#endif
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a48cf4ae391e..85bdef78d142 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -55,13 +55,10 @@ struct ceph_timespec {
55 * placement group. 55 * placement group.
56 * we encode this into one __le64. 56 * we encode this into one __le64.
57 */ 57 */
58union ceph_pg { 58struct ceph_pg {
59 __u64 pg64; 59 __le16 preferred; /* preferred primary osd */
60 struct { 60 __le16 ps; /* placement seed */
61 __s16 preferred; /* preferred primary osd */ 61 __le32 pool; /* object pool */
62 __u16 ps; /* placement seed */
63 __u32 pool; /* object pool */
64 } __attribute__ ((packed)) pg;
65} __attribute__ ((packed)); 62} __attribute__ ((packed));
66 63
67/* 64/*
@@ -117,7 +114,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask)
117 * object layout - how a given object should be stored. 114 * object layout - how a given object should be stored.
118 */ 115 */
119struct ceph_object_layout { 116struct ceph_object_layout {
120 __le64 ol_pgid; /* raw pg, with _full_ ps precision. */ 117 struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */
121 __le32 ol_stripe_unit; /* for per-object parity, if any */ 118 __le32 ol_stripe_unit; /* for per-object parity, if any */
122} __attribute__ ((packed)); 119} __attribute__ ((packed));
123 120