diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/ioctl.c | 4 | ||||
-rw-r--r-- | fs/ceph/osd_client.c | 8 | ||||
-rw-r--r-- | fs/ceph/osdmap.c | 100 | ||||
-rw-r--r-- | fs/ceph/osdmap.h | 5 | ||||
-rw-r--r-- | fs/ceph/rados.h | 13 |
5 files changed, 75 insertions, 55 deletions
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index e4f99eff5d93..4c33e19fc241 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -99,7 +99,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
99 | u64 len = 1, olen; | 99 | u64 len = 1, olen; |
100 | u64 tmp; | 100 | u64 tmp; |
101 | struct ceph_object_layout ol; | 101 | struct ceph_object_layout ol; |
102 | union ceph_pg pgid; | 102 | struct ceph_pg pgid; |
103 | 103 | ||
104 | /* copy and validate */ | 104 | /* copy and validate */ |
105 | if (copy_from_user(&dl, arg, sizeof(dl))) | 105 | if (copy_from_user(&dl, arg, sizeof(dl))) |
@@ -121,7 +121,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
121 | ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, | 121 | ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, |
122 | osdc->osdmap); | 122 | osdc->osdmap); |
123 | 123 | ||
124 | pgid.pg64 = le64_to_cpu(ol.ol_pgid); | 124 | pgid = ol.ol_pgid; |
125 | dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); | 125 | dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); |
126 | if (dl.osd >= 0) { | 126 | if (dl.osd >= 0) { |
127 | struct ceph_entity_addr *a = | 127 | struct ceph_entity_addr *a = |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index 7dc0f6299a52..7db14ba6261c 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -520,7 +520,7 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
520 | struct ceph_osd_request *req) | 520 | struct ceph_osd_request *req) |
521 | { | 521 | { |
522 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | 522 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; |
523 | union ceph_pg pgid; | 523 | struct ceph_pg pgid; |
524 | int o = -1; | 524 | int o = -1; |
525 | int err; | 525 | int err; |
526 | struct ceph_osd *newosd = NULL; | 526 | struct ceph_osd *newosd = NULL; |
@@ -530,7 +530,7 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
530 | &req->r_file_layout, osdc->osdmap); | 530 | &req->r_file_layout, osdc->osdmap); |
531 | if (err) | 531 | if (err) |
532 | return err; | 532 | return err; |
533 | pgid.pg64 = le64_to_cpu(reqhead->layout.ol_pgid); | 533 | pgid = reqhead->layout.ol_pgid; |
534 | o = ceph_calc_pg_primary(osdc->osdmap, pgid); | 534 | o = ceph_calc_pg_primary(osdc->osdmap, pgid); |
535 | 535 | ||
536 | if ((req->r_osd && req->r_osd->o_osd == o && | 536 | if ((req->r_osd && req->r_osd->o_osd == o && |
@@ -538,8 +538,8 @@ static int __map_osds(struct ceph_osd_client *osdc, | |||
538 | (req->r_osd == NULL && o == -1)) | 538 | (req->r_osd == NULL && o == -1)) |
539 | return 0; /* no change */ | 539 | return 0; /* no change */ |
540 | 540 | ||
541 | dout("map_osds tid %llu pgid %llx pool %d osd%d (was osd%d)\n", | 541 | dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n", |
542 | req->r_tid, pgid.pg64, pgid.pg.pool, o, | 542 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, |
543 | req->r_osd ? req->r_osd->o_osd : -1); | 543 | req->r_osd ? req->r_osd->o_osd : -1); |
544 | 544 | ||
545 | if (req->r_osd) { | 545 | if (req->r_osd) { |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index cd7bb265d789..8b0cd1107507 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -366,19 +366,33 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | |||
366 | /* | 366 | /* |
367 | * Insert a new pg_temp mapping | 367 | * Insert a new pg_temp mapping |
368 | */ | 368 | */ |
369 | static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) | ||
370 | { | ||
371 | u64 a = *(u64 *)&l; | ||
372 | u64 b = *(u64 *)&r; | ||
373 | |||
374 | if (a < b) | ||
375 | return -1; | ||
376 | if (a > b) | ||
377 | return 1; | ||
378 | return 0; | ||
379 | } | ||
380 | |||
369 | static int __insert_pg_mapping(struct ceph_pg_mapping *new, | 381 | static int __insert_pg_mapping(struct ceph_pg_mapping *new, |
370 | struct rb_root *root) | 382 | struct rb_root *root) |
371 | { | 383 | { |
372 | struct rb_node **p = &root->rb_node; | 384 | struct rb_node **p = &root->rb_node; |
373 | struct rb_node *parent = NULL; | 385 | struct rb_node *parent = NULL; |
374 | struct ceph_pg_mapping *pg = NULL; | 386 | struct ceph_pg_mapping *pg = NULL; |
387 | int c; | ||
375 | 388 | ||
376 | while (*p) { | 389 | while (*p) { |
377 | parent = *p; | 390 | parent = *p; |
378 | pg = rb_entry(parent, struct ceph_pg_mapping, node); | 391 | pg = rb_entry(parent, struct ceph_pg_mapping, node); |
379 | if (new->pgid < pg->pgid) | 392 | c = pgid_cmp(new->pgid, pg->pgid); |
393 | if (c < 0) | ||
380 | p = &(*p)->rb_left; | 394 | p = &(*p)->rb_left; |
381 | else if (new->pgid > pg->pgid) | 395 | else if (c > 0) |
382 | p = &(*p)->rb_right; | 396 | p = &(*p)->rb_right; |
383 | else | 397 | else |
384 | return -EEXIST; | 398 | return -EEXIST; |
@@ -467,11 +481,11 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
467 | ceph_decode_32_safe(p, end, len, bad); | 481 | ceph_decode_32_safe(p, end, len, bad); |
468 | for (i = 0; i < len; i++) { | 482 | for (i = 0; i < len; i++) { |
469 | int n, j; | 483 | int n, j; |
470 | u64 pgid; | 484 | struct ceph_pg pgid; |
471 | struct ceph_pg_mapping *pg; | 485 | struct ceph_pg_mapping *pg; |
472 | 486 | ||
473 | ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); | 487 | ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); |
474 | pgid = ceph_decode_64(p); | 488 | ceph_decode_copy(p, &pgid, sizeof(pgid)); |
475 | n = ceph_decode_32(p); | 489 | n = ceph_decode_32(p); |
476 | ceph_decode_need(p, end, n * sizeof(u32), bad); | 490 | ceph_decode_need(p, end, n * sizeof(u32), bad); |
477 | pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); | 491 | pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); |
@@ -487,7 +501,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
487 | err = __insert_pg_mapping(pg, &map->pg_temp); | 501 | err = __insert_pg_mapping(pg, &map->pg_temp); |
488 | if (err) | 502 | if (err) |
489 | goto bad; | 503 | goto bad; |
490 | dout(" added pg_temp %llx len %d\n", pgid, len); | 504 | dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); |
491 | } | 505 | } |
492 | 506 | ||
493 | /* crush */ | 507 | /* crush */ |
@@ -659,19 +673,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
659 | while (len--) { | 673 | while (len--) { |
660 | struct ceph_pg_mapping *pg; | 674 | struct ceph_pg_mapping *pg; |
661 | int j; | 675 | int j; |
662 | u64 pgid; | 676 | struct ceph_pg pgid; |
663 | u32 pglen; | 677 | u32 pglen; |
664 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); | 678 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); |
665 | pgid = ceph_decode_64(p); | 679 | ceph_decode_copy(p, &pgid, sizeof(pgid)); |
666 | pglen = ceph_decode_32(p); | 680 | pglen = ceph_decode_32(p); |
667 | 681 | ||
668 | /* remove any? */ | 682 | /* remove any? */ |
669 | while (rbp && rb_entry(rbp, struct ceph_pg_mapping, | 683 | while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, |
670 | node)->pgid <= pgid) { | 684 | node)->pgid, pgid) <= 0) { |
671 | struct rb_node *cur = rbp; | 685 | struct rb_node *cur = rbp; |
672 | rbp = rb_next(rbp); | 686 | rbp = rb_next(rbp); |
673 | dout(" removed pg_temp %llx\n", | 687 | dout(" removed pg_temp %llx\n", |
674 | rb_entry(cur, struct ceph_pg_mapping, node)->pgid); | 688 | *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, |
689 | node)->pgid); | ||
675 | rb_erase(cur, &map->pg_temp); | 690 | rb_erase(cur, &map->pg_temp); |
676 | } | 691 | } |
677 | 692 | ||
@@ -690,14 +705,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
690 | err = __insert_pg_mapping(pg, &map->pg_temp); | 705 | err = __insert_pg_mapping(pg, &map->pg_temp); |
691 | if (err) | 706 | if (err) |
692 | goto bad; | 707 | goto bad; |
693 | dout(" added pg_temp %llx len %d\n", pgid, pglen); | 708 | dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, |
709 | pglen); | ||
694 | } | 710 | } |
695 | } | 711 | } |
696 | while (rbp) { | 712 | while (rbp) { |
697 | struct rb_node *cur = rbp; | 713 | struct rb_node *cur = rbp; |
698 | rbp = rb_next(rbp); | 714 | rbp = rb_next(rbp); |
699 | dout(" removed pg_temp %llx\n", | 715 | dout(" removed pg_temp %llx\n", |
700 | rb_entry(cur, struct ceph_pg_mapping, node)->pgid); | 716 | *(u64 *)&rb_entry(cur, struct ceph_pg_mapping, |
717 | node)->pgid); | ||
701 | rb_erase(cur, &map->pg_temp); | 718 | rb_erase(cur, &map->pg_temp); |
702 | } | 719 | } |
703 | 720 | ||
@@ -782,16 +799,19 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
782 | struct ceph_osdmap *osdmap) | 799 | struct ceph_osdmap *osdmap) |
783 | { | 800 | { |
784 | unsigned num, num_mask; | 801 | unsigned num, num_mask; |
785 | union ceph_pg pgid; | 802 | struct ceph_pg pgid; |
786 | s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); | 803 | s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); |
787 | int poolid = le32_to_cpu(fl->fl_pg_pool); | 804 | int poolid = le32_to_cpu(fl->fl_pg_pool); |
788 | struct ceph_pg_pool_info *pool; | 805 | struct ceph_pg_pool_info *pool; |
806 | unsigned ps; | ||
789 | 807 | ||
790 | if (poolid >= osdmap->num_pools) | 808 | if (poolid >= osdmap->num_pools) |
791 | return -EIO; | 809 | return -EIO; |
792 | pool = &osdmap->pg_pool[poolid]; | ||
793 | 810 | ||
811 | pool = &osdmap->pg_pool[poolid]; | ||
812 | ps = ceph_full_name_hash(oid, strlen(oid)); | ||
794 | if (preferred >= 0) { | 813 | if (preferred >= 0) { |
814 | ps += preferred; | ||
795 | num = le32_to_cpu(pool->v.lpg_num); | 815 | num = le32_to_cpu(pool->v.lpg_num); |
796 | num_mask = pool->lpg_num_mask; | 816 | num_mask = pool->lpg_num_mask; |
797 | } else { | 817 | } else { |
@@ -799,22 +819,17 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
799 | num_mask = pool->pg_num_mask; | 819 | num_mask = pool->pg_num_mask; |
800 | } | 820 | } |
801 | 821 | ||
802 | pgid.pg64 = 0; /* start with it zeroed out */ | 822 | pgid.ps = cpu_to_le16(ps); |
803 | pgid.pg.ps = ceph_full_name_hash(oid, strlen(oid)); | 823 | pgid.preferred = cpu_to_le16(preferred); |
804 | pgid.pg.preferred = preferred; | 824 | pgid.pool = fl->fl_pg_pool; |
805 | if (preferred >= 0) | ||
806 | pgid.pg.ps += preferred; | ||
807 | pgid.pg.pool = le32_to_cpu(fl->fl_pg_pool); | ||
808 | if (preferred >= 0) | 825 | if (preferred >= 0) |
809 | dout("calc_object_layout '%s' pgid %d.%xp%d (%llx)\n", oid, | 826 | dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps, |
810 | pgid.pg.pool, pgid.pg.ps, (int)preferred, pgid.pg64); | 827 | (int)preferred); |
811 | else | 828 | else |
812 | dout("calc_object_layout '%s' pgid %d.%x (%llx)\n", oid, | 829 | dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); |
813 | pgid.pg.pool, pgid.pg.ps, pgid.pg64); | ||
814 | 830 | ||
815 | ol->ol_pgid = cpu_to_le64(pgid.pg64); | 831 | ol->ol_pgid = pgid; |
816 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; | 832 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; |
817 | |||
818 | return 0; | 833 | return 0; |
819 | } | 834 | } |
820 | 835 | ||
@@ -822,21 +837,24 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
822 | * Calculate raw osd vector for the given pgid. Return pointer to osd | 837 | * Calculate raw osd vector for the given pgid. Return pointer to osd |
823 | * array, or NULL on failure. | 838 | * array, or NULL on failure. |
824 | */ | 839 | */ |
825 | static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid, | 840 | static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, |
826 | int *osds, int *num) | 841 | int *osds, int *num) |
827 | { | 842 | { |
828 | struct rb_node *n = osdmap->pg_temp.rb_node; | 843 | struct rb_node *n = osdmap->pg_temp.rb_node; |
829 | struct ceph_pg_mapping *pg; | 844 | struct ceph_pg_mapping *pg; |
830 | struct ceph_pg_pool_info *pool; | 845 | struct ceph_pg_pool_info *pool; |
831 | int ruleno; | 846 | int ruleno; |
832 | unsigned pps; /* placement ps */ | 847 | unsigned poolid, ps, pps; |
848 | int preferred; | ||
849 | int c; | ||
833 | 850 | ||
834 | /* pg_temp? */ | 851 | /* pg_temp? */ |
835 | while (n) { | 852 | while (n) { |
836 | pg = rb_entry(n, struct ceph_pg_mapping, node); | 853 | pg = rb_entry(n, struct ceph_pg_mapping, node); |
837 | if (pgid.pg64 < pg->pgid) | 854 | c = pgid_cmp(pgid, pg->pgid); |
855 | if (c < 0) | ||
838 | n = n->rb_left; | 856 | n = n->rb_left; |
839 | else if (pgid.pg64 > pg->pgid) | 857 | else if (c > 0) |
840 | n = n->rb_right; | 858 | n = n->rb_right; |
841 | else { | 859 | else { |
842 | *num = pg->len; | 860 | *num = pg->len; |
@@ -845,36 +863,40 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid, | |||
845 | } | 863 | } |
846 | 864 | ||
847 | /* crush */ | 865 | /* crush */ |
848 | if (pgid.pg.pool >= osdmap->num_pools) | 866 | poolid = le32_to_cpu(pgid.pool); |
867 | ps = le16_to_cpu(pgid.ps); | ||
868 | preferred = (s16)le16_to_cpu(pgid.preferred); | ||
869 | |||
870 | if (poolid >= osdmap->num_pools) | ||
849 | return NULL; | 871 | return NULL; |
850 | pool = &osdmap->pg_pool[pgid.pg.pool]; | 872 | pool = &osdmap->pg_pool[poolid]; |
851 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 873 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, |
852 | pool->v.type, pool->v.size); | 874 | pool->v.type, pool->v.size); |
853 | if (ruleno < 0) { | 875 | if (ruleno < 0) { |
854 | pr_err("no crush rule pool %d type %d size %d\n", | 876 | pr_err("no crush rule pool %d type %d size %d\n", |
855 | pgid.pg.pool, pool->v.type, pool->v.size); | 877 | poolid, pool->v.type, pool->v.size); |
856 | return NULL; | 878 | return NULL; |
857 | } | 879 | } |
858 | 880 | ||
859 | if (pgid.pg.preferred >= 0) | 881 | if (preferred >= 0) |
860 | pps = ceph_stable_mod(pgid.pg.ps, | 882 | pps = ceph_stable_mod(ps, |
861 | le32_to_cpu(pool->v.lpgp_num), | 883 | le32_to_cpu(pool->v.lpgp_num), |
862 | pool->lpgp_num_mask); | 884 | pool->lpgp_num_mask); |
863 | else | 885 | else |
864 | pps = ceph_stable_mod(pgid.pg.ps, | 886 | pps = ceph_stable_mod(ps, |
865 | le32_to_cpu(pool->v.pgp_num), | 887 | le32_to_cpu(pool->v.pgp_num), |
866 | pool->pgp_num_mask); | 888 | pool->pgp_num_mask); |
867 | pps += pgid.pg.pool; | 889 | pps += poolid; |
868 | *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, | 890 | *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, |
869 | min_t(int, pool->v.size, *num), | 891 | min_t(int, pool->v.size, *num), |
870 | pgid.pg.preferred, osdmap->osd_weight); | 892 | preferred, osdmap->osd_weight); |
871 | return osds; | 893 | return osds; |
872 | } | 894 | } |
873 | 895 | ||
874 | /* | 896 | /* |
875 | * Return primary osd for given pgid, or -1 if none. | 897 | * Return primary osd for given pgid, or -1 if none. |
876 | */ | 898 | */ |
877 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid) | 899 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) |
878 | { | 900 | { |
879 | int rawosds[10], *osds; | 901 | int rawosds[10], *osds; |
880 | int i, num = ARRAY_SIZE(rawosds); | 902 | int i, num = ARRAY_SIZE(rawosds); |
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h index 07127c6fb134..c4af8418aa00 100644 --- a/fs/ceph/osdmap.h +++ b/fs/ceph/osdmap.h | |||
@@ -25,7 +25,7 @@ struct ceph_pg_pool_info { | |||
25 | 25 | ||
26 | struct ceph_pg_mapping { | 26 | struct ceph_pg_mapping { |
27 | struct rb_node node; | 27 | struct rb_node node; |
28 | u64 pgid; | 28 | struct ceph_pg pgid; |
29 | int len; | 29 | int len; |
30 | int osds[]; | 30 | int osds[]; |
31 | }; | 31 | }; |
@@ -118,6 +118,7 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, | |||
118 | const char *oid, | 118 | const char *oid, |
119 | struct ceph_file_layout *fl, | 119 | struct ceph_file_layout *fl, |
120 | struct ceph_osdmap *osdmap); | 120 | struct ceph_osdmap *osdmap); |
121 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid); | 121 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, |
122 | struct ceph_pg pgid); | ||
122 | 123 | ||
123 | #endif | 124 | #endif |
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index a48cf4ae391e..85bdef78d142 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -55,13 +55,10 @@ struct ceph_timespec { | |||
55 | * placement group. | 55 | * placement group. |
56 | * we encode this into one __le64. | 56 | * we encode this into one __le64. |
57 | */ | 57 | */ |
58 | union ceph_pg { | 58 | struct ceph_pg { |
59 | __u64 pg64; | 59 | __le16 preferred; /* preferred primary osd */ |
60 | struct { | 60 | __le16 ps; /* placement seed */ |
61 | __s16 preferred; /* preferred primary osd */ | 61 | __le32 pool; /* object pool */ |
62 | __u16 ps; /* placement seed */ | ||
63 | __u32 pool; /* object pool */ | ||
64 | } __attribute__ ((packed)) pg; | ||
65 | } __attribute__ ((packed)); | 62 | } __attribute__ ((packed)); |
66 | 63 | ||
67 | /* | 64 | /* |
@@ -117,7 +114,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) | |||
117 | * object layout - how a given object should be stored. | 114 | * object layout - how a given object should be stored. |
118 | */ | 115 | */ |
119 | struct ceph_object_layout { | 116 | struct ceph_object_layout { |
120 | __le64 ol_pgid; /* raw pg, with _full_ ps precision. */ | 117 | struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ |
121 | __le32 ol_stripe_unit; /* for per-object parity, if any */ | 118 | __le32 ol_stripe_unit; /* for per-object parity, if any */ |
122 | } __attribute__ ((packed)); | 119 | } __attribute__ ((packed)); |
123 | 120 | ||