diff options
author | Alex Elder <elder@inktank.com> | 2013-01-30 08:54:34 -0500 |
---|---|---|
committer | Alex Elder <elder@inktank.com> | 2013-01-30 08:54:34 -0500 |
commit | 969e5aa3b0162a02c4f287d48ff58ca2145acf1b (patch) | |
tree | 1af8e8e47e7352c6d3b4abfdb4aea6bd9458666f /drivers/block | |
parent | 949db153b6466c6f7cad5a427ecea94985927311 (diff) | |
parent | 1ec3911dbd19076bcdfe5540096ff67f91a6ec02 (diff) |
Merge branch 'testing' of github.com:ceph/ceph-client into v3.8-rc5-testing
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/rbd.c | 855 |
1 files changed, 444 insertions, 411 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0b3f2e..668936381ab0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -52,9 +52,12 @@ | |||
52 | #define SECTOR_SHIFT 9 | 52 | #define SECTOR_SHIFT 9 |
53 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) | 53 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) |
54 | 54 | ||
55 | /* It might be useful to have this defined elsewhere too */ | 55 | /* It might be useful to have these defined elsewhere */ |
56 | 56 | ||
57 | #define U64_MAX ((u64) (~0ULL)) | 57 | #define U8_MAX ((u8) (~0U)) |
58 | #define U16_MAX ((u16) (~0U)) | ||
59 | #define U32_MAX ((u32) (~0U)) | ||
60 | #define U64_MAX ((u64) (~0ULL)) | ||
58 | 61 | ||
59 | #define RBD_DRV_NAME "rbd" | 62 | #define RBD_DRV_NAME "rbd" |
60 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" | 63 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" |
@@ -66,7 +69,6 @@ | |||
66 | (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) | 69 | (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) |
67 | 70 | ||
68 | #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ | 71 | #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ |
69 | #define RBD_MAX_OPT_LEN 1024 | ||
70 | 72 | ||
71 | #define RBD_SNAP_HEAD_NAME "-" | 73 | #define RBD_SNAP_HEAD_NAME "-" |
72 | 74 | ||
@@ -93,8 +95,6 @@ | |||
93 | #define DEV_NAME_LEN 32 | 95 | #define DEV_NAME_LEN 32 |
94 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) | 96 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) |
95 | 97 | ||
96 | #define RBD_READ_ONLY_DEFAULT false | ||
97 | |||
98 | /* | 98 | /* |
99 | * block device image metadata (in-memory version) | 99 | * block device image metadata (in-memory version) |
100 | */ | 100 | */ |
@@ -119,16 +119,33 @@ struct rbd_image_header { | |||
119 | * An rbd image specification. | 119 | * An rbd image specification. |
120 | * | 120 | * |
121 | * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely | 121 | * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely |
122 | * identify an image. | 122 | * identify an image. Each rbd_dev structure includes a pointer to |
123 | * an rbd_spec structure that encapsulates this identity. | ||
124 | * | ||
125 | * Each of the id's in an rbd_spec has an associated name. For a | ||
126 | * user-mapped image, the names are supplied and the id's associated | ||
127 | * with them are looked up. For a layered image, a parent image is | ||
128 | * defined by the tuple, and the names are looked up. | ||
129 | * | ||
130 | * An rbd_dev structure contains a parent_spec pointer which is | ||
131 | * non-null if the image it represents is a child in a layered | ||
132 | * image. This pointer will refer to the rbd_spec structure used | ||
133 | * by the parent rbd_dev for its own identity (i.e., the structure | ||
134 | * is shared between the parent and child). | ||
135 | * | ||
136 | * Since these structures are populated once, during the discovery | ||
137 | * phase of image construction, they are effectively immutable so | ||
138 | * we make no effort to synchronize access to them. | ||
139 | * | ||
140 | * Note that code herein does not assume the image name is known (it | ||
141 | * could be a null pointer). | ||
123 | */ | 142 | */ |
124 | struct rbd_spec { | 143 | struct rbd_spec { |
125 | u64 pool_id; | 144 | u64 pool_id; |
126 | char *pool_name; | 145 | char *pool_name; |
127 | 146 | ||
128 | char *image_id; | 147 | char *image_id; |
129 | size_t image_id_len; | ||
130 | char *image_name; | 148 | char *image_name; |
131 | size_t image_name_len; | ||
132 | 149 | ||
133 | u64 snap_id; | 150 | u64 snap_id; |
134 | char *snap_name; | 151 | char *snap_name; |
@@ -136,10 +153,6 @@ struct rbd_spec { | |||
136 | struct kref kref; | 153 | struct kref kref; |
137 | }; | 154 | }; |
138 | 155 | ||
139 | struct rbd_options { | ||
140 | bool read_only; | ||
141 | }; | ||
142 | |||
143 | /* | 156 | /* |
144 | * an instance of the client. multiple devices may share an rbd client. | 157 | * an instance of the client. multiple devices may share an rbd client. |
145 | */ | 158 | */ |
@@ -154,7 +167,7 @@ struct rbd_client { | |||
154 | */ | 167 | */ |
155 | struct rbd_req_status { | 168 | struct rbd_req_status { |
156 | int done; | 169 | int done; |
157 | int rc; | 170 | s32 rc; |
158 | u64 bytes; | 171 | u64 bytes; |
159 | }; | 172 | }; |
160 | 173 | ||
@@ -212,11 +225,13 @@ struct rbd_device { | |||
212 | spinlock_t lock; /* queue lock */ | 225 | spinlock_t lock; /* queue lock */ |
213 | 226 | ||
214 | struct rbd_image_header header; | 227 | struct rbd_image_header header; |
215 | bool exists; | 228 | atomic_t exists; |
216 | struct rbd_spec *spec; | 229 | struct rbd_spec *spec; |
217 | 230 | ||
218 | char *header_name; | 231 | char *header_name; |
219 | 232 | ||
233 | struct ceph_file_layout layout; | ||
234 | |||
220 | struct ceph_osd_event *watch_event; | 235 | struct ceph_osd_event *watch_event; |
221 | struct ceph_osd_request *watch_request; | 236 | struct ceph_osd_request *watch_request; |
222 | 237 | ||
@@ -277,6 +292,33 @@ static struct device rbd_root_dev = { | |||
277 | .release = rbd_root_dev_release, | 292 | .release = rbd_root_dev_release, |
278 | }; | 293 | }; |
279 | 294 | ||
295 | static __printf(2, 3) | ||
296 | void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) | ||
297 | { | ||
298 | struct va_format vaf; | ||
299 | va_list args; | ||
300 | |||
301 | va_start(args, fmt); | ||
302 | vaf.fmt = fmt; | ||
303 | vaf.va = &args; | ||
304 | |||
305 | if (!rbd_dev) | ||
306 | printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); | ||
307 | else if (rbd_dev->disk) | ||
308 | printk(KERN_WARNING "%s: %s: %pV\n", | ||
309 | RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); | ||
310 | else if (rbd_dev->spec && rbd_dev->spec->image_name) | ||
311 | printk(KERN_WARNING "%s: image %s: %pV\n", | ||
312 | RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); | ||
313 | else if (rbd_dev->spec && rbd_dev->spec->image_id) | ||
314 | printk(KERN_WARNING "%s: id %s: %pV\n", | ||
315 | RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); | ||
316 | else /* punt */ | ||
317 | printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", | ||
318 | RBD_DRV_NAME, rbd_dev, &vaf); | ||
319 | va_end(args); | ||
320 | } | ||
321 | |||
280 | #ifdef RBD_DEBUG | 322 | #ifdef RBD_DEBUG |
281 | #define rbd_assert(expr) \ | 323 | #define rbd_assert(expr) \ |
282 | if (unlikely(!(expr))) { \ | 324 | if (unlikely(!(expr))) { \ |
@@ -426,6 +468,12 @@ static match_table_t rbd_opts_tokens = { | |||
426 | {-1, NULL} | 468 | {-1, NULL} |
427 | }; | 469 | }; |
428 | 470 | ||
471 | struct rbd_options { | ||
472 | bool read_only; | ||
473 | }; | ||
474 | |||
475 | #define RBD_READ_ONLY_DEFAULT false | ||
476 | |||
429 | static int parse_rbd_opts_token(char *c, void *private) | 477 | static int parse_rbd_opts_token(char *c, void *private) |
430 | { | 478 | { |
431 | struct rbd_options *rbd_opts = private; | 479 | struct rbd_options *rbd_opts = private; |
@@ -707,7 +755,7 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) | |||
707 | goto done; | 755 | goto done; |
708 | rbd_dev->mapping.read_only = true; | 756 | rbd_dev->mapping.read_only = true; |
709 | } | 757 | } |
710 | rbd_dev->exists = true; | 758 | atomic_set(&rbd_dev->exists, 1); |
711 | done: | 759 | done: |
712 | return ret; | 760 | return ret; |
713 | } | 761 | } |
@@ -724,7 +772,7 @@ static void rbd_header_free(struct rbd_image_header *header) | |||
724 | header->snapc = NULL; | 772 | header->snapc = NULL; |
725 | } | 773 | } |
726 | 774 | ||
727 | static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) | 775 | static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) |
728 | { | 776 | { |
729 | char *name; | 777 | char *name; |
730 | u64 segment; | 778 | u64 segment; |
@@ -772,6 +820,7 @@ static int rbd_get_num_segments(struct rbd_image_header *header, | |||
772 | { | 820 | { |
773 | u64 start_seg; | 821 | u64 start_seg; |
774 | u64 end_seg; | 822 | u64 end_seg; |
823 | u64 result; | ||
775 | 824 | ||
776 | if (!len) | 825 | if (!len) |
777 | return 0; | 826 | return 0; |
@@ -781,7 +830,11 @@ static int rbd_get_num_segments(struct rbd_image_header *header, | |||
781 | start_seg = ofs >> header->obj_order; | 830 | start_seg = ofs >> header->obj_order; |
782 | end_seg = (ofs + len - 1) >> header->obj_order; | 831 | end_seg = (ofs + len - 1) >> header->obj_order; |
783 | 832 | ||
784 | return end_seg - start_seg + 1; | 833 | result = end_seg - start_seg + 1; |
834 | if (result > (u64) INT_MAX) | ||
835 | return -ERANGE; | ||
836 | |||
837 | return (int) result; | ||
785 | } | 838 | } |
786 | 839 | ||
787 | /* | 840 | /* |
@@ -949,8 +1002,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, | |||
949 | unsigned int bi_size; | 1002 | unsigned int bi_size; |
950 | struct bio *bio; | 1003 | struct bio *bio; |
951 | 1004 | ||
952 | if (!bi) | 1005 | if (!bi) { |
1006 | rbd_warn(NULL, "bio_chain exhausted with %u left", len); | ||
953 | goto out_err; /* EINVAL; ran out of bio's */ | 1007 | goto out_err; /* EINVAL; ran out of bio's */ |
1008 | } | ||
954 | bi_size = min_t(unsigned int, bi->bi_size - off, len); | 1009 | bi_size = min_t(unsigned int, bi->bi_size - off, len); |
955 | bio = bio_clone_range(bi, off, bi_size, gfpmask); | 1010 | bio = bio_clone_range(bi, off, bi_size, gfpmask); |
956 | if (!bio) | 1011 | if (!bio) |
@@ -976,44 +1031,84 @@ out_err: | |||
976 | return NULL; | 1031 | return NULL; |
977 | } | 1032 | } |
978 | 1033 | ||
979 | /* | 1034 | struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) |
980 | * helpers for osd request op vectors. | ||
981 | */ | ||
982 | static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, | ||
983 | int opcode, u32 payload_len) | ||
984 | { | 1035 | { |
985 | struct ceph_osd_req_op *ops; | 1036 | struct ceph_osd_req_op *op; |
1037 | va_list args; | ||
1038 | size_t size; | ||
986 | 1039 | ||
987 | ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); | 1040 | op = kzalloc(sizeof (*op), GFP_NOIO); |
988 | if (!ops) | 1041 | if (!op) |
989 | return NULL; | 1042 | return NULL; |
1043 | op->op = opcode; | ||
1044 | va_start(args, opcode); | ||
1045 | switch (opcode) { | ||
1046 | case CEPH_OSD_OP_READ: | ||
1047 | case CEPH_OSD_OP_WRITE: | ||
1048 | /* rbd_osd_req_op_create(READ, offset, length) */ | ||
1049 | /* rbd_osd_req_op_create(WRITE, offset, length) */ | ||
1050 | op->extent.offset = va_arg(args, u64); | ||
1051 | op->extent.length = va_arg(args, u64); | ||
1052 | if (opcode == CEPH_OSD_OP_WRITE) | ||
1053 | op->payload_len = op->extent.length; | ||
1054 | break; | ||
1055 | case CEPH_OSD_OP_CALL: | ||
1056 | /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ | ||
1057 | op->cls.class_name = va_arg(args, char *); | ||
1058 | size = strlen(op->cls.class_name); | ||
1059 | rbd_assert(size <= (size_t) U8_MAX); | ||
1060 | op->cls.class_len = size; | ||
1061 | op->payload_len = size; | ||
1062 | |||
1063 | op->cls.method_name = va_arg(args, char *); | ||
1064 | size = strlen(op->cls.method_name); | ||
1065 | rbd_assert(size <= (size_t) U8_MAX); | ||
1066 | op->cls.method_len = size; | ||
1067 | op->payload_len += size; | ||
1068 | |||
1069 | op->cls.argc = 0; | ||
1070 | op->cls.indata = va_arg(args, void *); | ||
1071 | size = va_arg(args, size_t); | ||
1072 | rbd_assert(size <= (size_t) U32_MAX); | ||
1073 | op->cls.indata_len = (u32) size; | ||
1074 | op->payload_len += size; | ||
1075 | break; | ||
1076 | case CEPH_OSD_OP_NOTIFY_ACK: | ||
1077 | case CEPH_OSD_OP_WATCH: | ||
1078 | /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ | ||
1079 | /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ | ||
1080 | op->watch.cookie = va_arg(args, u64); | ||
1081 | op->watch.ver = va_arg(args, u64); | ||
1082 | op->watch.ver = cpu_to_le64(op->watch.ver); | ||
1083 | if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) | ||
1084 | op->watch.flag = (u8) 1; | ||
1085 | break; | ||
1086 | default: | ||
1087 | rbd_warn(NULL, "unsupported opcode %hu\n", opcode); | ||
1088 | kfree(op); | ||
1089 | op = NULL; | ||
1090 | break; | ||
1091 | } | ||
1092 | va_end(args); | ||
990 | 1093 | ||
991 | ops[0].op = opcode; | 1094 | return op; |
992 | |||
993 | /* | ||
994 | * op extent offset and length will be set later on | ||
995 | * in calc_raw_layout() | ||
996 | */ | ||
997 | ops[0].payload_len = payload_len; | ||
998 | |||
999 | return ops; | ||
1000 | } | 1095 | } |
1001 | 1096 | ||
1002 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) | 1097 | static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) |
1003 | { | 1098 | { |
1004 | kfree(ops); | 1099 | kfree(op); |
1005 | } | 1100 | } |
1006 | 1101 | ||
1007 | static void rbd_coll_end_req_index(struct request *rq, | 1102 | static void rbd_coll_end_req_index(struct request *rq, |
1008 | struct rbd_req_coll *coll, | 1103 | struct rbd_req_coll *coll, |
1009 | int index, | 1104 | int index, |
1010 | int ret, u64 len) | 1105 | s32 ret, u64 len) |
1011 | { | 1106 | { |
1012 | struct request_queue *q; | 1107 | struct request_queue *q; |
1013 | int min, max, i; | 1108 | int min, max, i; |
1014 | 1109 | ||
1015 | dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", | 1110 | dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", |
1016 | coll, index, ret, (unsigned long long) len); | 1111 | coll, index, (int)ret, (unsigned long long)len); |
1017 | 1112 | ||
1018 | if (!rq) | 1113 | if (!rq) |
1019 | return; | 1114 | return; |
@@ -1034,7 +1129,7 @@ static void rbd_coll_end_req_index(struct request *rq, | |||
1034 | max++; | 1129 | max++; |
1035 | 1130 | ||
1036 | for (i = min; i<max; i++) { | 1131 | for (i = min; i<max; i++) { |
1037 | __blk_end_request(rq, coll->status[i].rc, | 1132 | __blk_end_request(rq, (int)coll->status[i].rc, |
1038 | coll->status[i].bytes); | 1133 | coll->status[i].bytes); |
1039 | coll->num_done++; | 1134 | coll->num_done++; |
1040 | kref_put(&coll->kref, rbd_coll_release); | 1135 | kref_put(&coll->kref, rbd_coll_release); |
@@ -1042,10 +1137,12 @@ static void rbd_coll_end_req_index(struct request *rq, | |||
1042 | spin_unlock_irq(q->queue_lock); | 1137 | spin_unlock_irq(q->queue_lock); |
1043 | } | 1138 | } |
1044 | 1139 | ||
1045 | static void rbd_coll_end_req(struct rbd_request *req, | 1140 | static void rbd_coll_end_req(struct rbd_request *rbd_req, |
1046 | int ret, u64 len) | 1141 | s32 ret, u64 len) |
1047 | { | 1142 | { |
1048 | rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); | 1143 | rbd_coll_end_req_index(rbd_req->rq, |
1144 | rbd_req->coll, rbd_req->coll_index, | ||
1145 | ret, len); | ||
1049 | } | 1146 | } |
1050 | 1147 | ||
1051 | /* | 1148 | /* |
@@ -1060,117 +1157,102 @@ static int rbd_do_request(struct request *rq, | |||
1060 | struct page **pages, | 1157 | struct page **pages, |
1061 | int num_pages, | 1158 | int num_pages, |
1062 | int flags, | 1159 | int flags, |
1063 | struct ceph_osd_req_op *ops, | 1160 | struct ceph_osd_req_op *op, |
1064 | struct rbd_req_coll *coll, | 1161 | struct rbd_req_coll *coll, |
1065 | int coll_index, | 1162 | int coll_index, |
1066 | void (*rbd_cb)(struct ceph_osd_request *req, | 1163 | void (*rbd_cb)(struct ceph_osd_request *, |
1067 | struct ceph_msg *msg), | 1164 | struct ceph_msg *), |
1068 | struct ceph_osd_request **linger_req, | ||
1069 | u64 *ver) | 1165 | u64 *ver) |
1070 | { | 1166 | { |
1071 | struct ceph_osd_request *req; | ||
1072 | struct ceph_file_layout *layout; | ||
1073 | int ret; | ||
1074 | u64 bno; | ||
1075 | struct timespec mtime = CURRENT_TIME; | ||
1076 | struct rbd_request *req_data; | ||
1077 | struct ceph_osd_request_head *reqhead; | ||
1078 | struct ceph_osd_client *osdc; | 1167 | struct ceph_osd_client *osdc; |
1079 | 1168 | struct ceph_osd_request *osd_req; | |
1080 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); | 1169 | struct rbd_request *rbd_req = NULL; |
1081 | if (!req_data) { | 1170 | struct timespec mtime = CURRENT_TIME; |
1082 | if (coll) | 1171 | int ret; |
1083 | rbd_coll_end_req_index(rq, coll, coll_index, | ||
1084 | -ENOMEM, len); | ||
1085 | return -ENOMEM; | ||
1086 | } | ||
1087 | |||
1088 | if (coll) { | ||
1089 | req_data->coll = coll; | ||
1090 | req_data->coll_index = coll_index; | ||
1091 | } | ||
1092 | 1172 | ||
1093 | dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", | 1173 | dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", |
1094 | object_name, (unsigned long long) ofs, | 1174 | object_name, (unsigned long long) ofs, |
1095 | (unsigned long long) len, coll, coll_index); | 1175 | (unsigned long long) len, coll, coll_index); |
1096 | 1176 | ||
1097 | osdc = &rbd_dev->rbd_client->client->osdc; | 1177 | osdc = &rbd_dev->rbd_client->client->osdc; |
1098 | req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, | 1178 | osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_NOIO); |
1099 | false, GFP_NOIO, pages, bio); | 1179 | if (!osd_req) |
1100 | if (!req) { | 1180 | return -ENOMEM; |
1101 | ret = -ENOMEM; | ||
1102 | goto done_pages; | ||
1103 | } | ||
1104 | |||
1105 | req->r_callback = rbd_cb; | ||
1106 | 1181 | ||
1107 | req_data->rq = rq; | 1182 | osd_req->r_flags = flags; |
1108 | req_data->bio = bio; | 1183 | osd_req->r_pages = pages; |
1109 | req_data->pages = pages; | 1184 | if (bio) { |
1110 | req_data->len = len; | 1185 | osd_req->r_bio = bio; |
1186 | bio_get(osd_req->r_bio); | ||
1187 | } | ||
1111 | 1188 | ||
1112 | req->r_priv = req_data; | 1189 | if (coll) { |
1190 | ret = -ENOMEM; | ||
1191 | rbd_req = kmalloc(sizeof(*rbd_req), GFP_NOIO); | ||
1192 | if (!rbd_req) | ||
1193 | goto done_osd_req; | ||
1194 | |||
1195 | rbd_req->rq = rq; | ||
1196 | rbd_req->bio = bio; | ||
1197 | rbd_req->pages = pages; | ||
1198 | rbd_req->len = len; | ||
1199 | rbd_req->coll = coll; | ||
1200 | rbd_req->coll_index = coll_index; | ||
1201 | } | ||
1113 | 1202 | ||
1114 | reqhead = req->r_request->front.iov_base; | 1203 | osd_req->r_callback = rbd_cb; |
1115 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); | 1204 | osd_req->r_priv = rbd_req; |
1116 | 1205 | ||
1117 | strncpy(req->r_oid, object_name, sizeof(req->r_oid)); | 1206 | strncpy(osd_req->r_oid, object_name, sizeof(osd_req->r_oid)); |
1118 | req->r_oid_len = strlen(req->r_oid); | 1207 | osd_req->r_oid_len = strlen(osd_req->r_oid); |
1119 | 1208 | ||
1120 | layout = &req->r_file_layout; | 1209 | osd_req->r_file_layout = rbd_dev->layout; /* struct */ |
1121 | memset(layout, 0, sizeof(*layout)); | 1210 | osd_req->r_num_pages = calc_pages_for(ofs, len); |
1122 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | 1211 | osd_req->r_page_alignment = ofs & ~PAGE_MASK; |
1123 | layout->fl_stripe_count = cpu_to_le32(1); | ||
1124 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
1125 | layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); | ||
1126 | ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, | ||
1127 | req, ops); | ||
1128 | rbd_assert(ret == 0); | ||
1129 | 1212 | ||
1130 | ceph_osdc_build_request(req, ofs, &len, | 1213 | ceph_osdc_build_request(osd_req, ofs, len, 1, op, |
1131 | ops, | 1214 | snapc, snapid, &mtime); |
1132 | snapc, | ||
1133 | &mtime, | ||
1134 | req->r_oid, req->r_oid_len); | ||
1135 | 1215 | ||
1136 | if (linger_req) { | 1216 | if (op->op == CEPH_OSD_OP_WATCH && op->watch.flag) { |
1137 | ceph_osdc_set_request_linger(osdc, req); | 1217 | ceph_osdc_set_request_linger(osdc, osd_req); |
1138 | *linger_req = req; | 1218 | rbd_dev->watch_request = osd_req; |
1139 | } | 1219 | } |
1140 | 1220 | ||
1141 | ret = ceph_osdc_start_request(osdc, req, false); | 1221 | ret = ceph_osdc_start_request(osdc, osd_req, false); |
1142 | if (ret < 0) | 1222 | if (ret < 0) |
1143 | goto done_err; | 1223 | goto done_err; |
1144 | 1224 | ||
1145 | if (!rbd_cb) { | 1225 | if (!rbd_cb) { |
1146 | ret = ceph_osdc_wait_request(osdc, req); | 1226 | u64 version; |
1227 | |||
1228 | ret = ceph_osdc_wait_request(osdc, osd_req); | ||
1229 | version = le64_to_cpu(osd_req->r_reassert_version.version); | ||
1147 | if (ver) | 1230 | if (ver) |
1148 | *ver = le64_to_cpu(req->r_reassert_version.version); | 1231 | *ver = version; |
1149 | dout("reassert_ver=%llu\n", | 1232 | dout("reassert_ver=%llu\n", (unsigned long long) version); |
1150 | (unsigned long long) | 1233 | ceph_osdc_put_request(osd_req); |
1151 | le64_to_cpu(req->r_reassert_version.version)); | ||
1152 | ceph_osdc_put_request(req); | ||
1153 | } | 1234 | } |
1154 | return ret; | 1235 | return ret; |
1155 | 1236 | ||
1156 | done_err: | 1237 | done_err: |
1157 | bio_chain_put(req_data->bio); | 1238 | if (bio) |
1158 | ceph_osdc_put_request(req); | 1239 | bio_chain_put(osd_req->r_bio); |
1159 | done_pages: | 1240 | kfree(rbd_req); |
1160 | rbd_coll_end_req(req_data, ret, len); | 1241 | done_osd_req: |
1161 | kfree(req_data); | 1242 | ceph_osdc_put_request(osd_req); |
1243 | |||
1162 | return ret; | 1244 | return ret; |
1163 | } | 1245 | } |
1164 | 1246 | ||
1165 | /* | 1247 | /* |
1166 | * Ceph osd op callback | 1248 | * Ceph osd op callback |
1167 | */ | 1249 | */ |
1168 | static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | 1250 | static void rbd_req_cb(struct ceph_osd_request *osd_req, struct ceph_msg *msg) |
1169 | { | 1251 | { |
1170 | struct rbd_request *req_data = req->r_priv; | 1252 | struct rbd_request *rbd_req = osd_req->r_priv; |
1171 | struct ceph_osd_reply_head *replyhead; | 1253 | struct ceph_osd_reply_head *replyhead; |
1172 | struct ceph_osd_op *op; | 1254 | struct ceph_osd_op *op; |
1173 | __s32 rc; | 1255 | s32 rc; |
1174 | u64 bytes; | 1256 | u64 bytes; |
1175 | int read_op; | 1257 | int read_op; |
1176 | 1258 | ||
@@ -1178,68 +1260,66 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | |||
1178 | replyhead = msg->front.iov_base; | 1260 | replyhead = msg->front.iov_base; |
1179 | WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); | 1261 | WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); |
1180 | op = (void *)(replyhead + 1); | 1262 | op = (void *)(replyhead + 1); |
1181 | rc = le32_to_cpu(replyhead->result); | 1263 | rc = (s32)le32_to_cpu(replyhead->result); |
1182 | bytes = le64_to_cpu(op->extent.length); | 1264 | bytes = le64_to_cpu(op->extent.length); |
1183 | read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); | 1265 | read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); |
1184 | 1266 | ||
1185 | dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", | 1267 | dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", |
1186 | (unsigned long long) bytes, read_op, (int) rc); | 1268 | (unsigned long long) bytes, read_op, (int) rc); |
1187 | 1269 | ||
1188 | if (rc == -ENOENT && read_op) { | 1270 | if (rc == (s32)-ENOENT && read_op) { |
1189 | zero_bio_chain(req_data->bio, 0); | 1271 | zero_bio_chain(rbd_req->bio, 0); |
1190 | rc = 0; | 1272 | rc = 0; |
1191 | } else if (rc == 0 && read_op && bytes < req_data->len) { | 1273 | } else if (rc == 0 && read_op && bytes < rbd_req->len) { |
1192 | zero_bio_chain(req_data->bio, bytes); | 1274 | zero_bio_chain(rbd_req->bio, bytes); |
1193 | bytes = req_data->len; | 1275 | bytes = rbd_req->len; |
1194 | } | 1276 | } |
1195 | 1277 | ||
1196 | rbd_coll_end_req(req_data, rc, bytes); | 1278 | rbd_coll_end_req(rbd_req, rc, bytes); |
1197 | 1279 | ||
1198 | if (req_data->bio) | 1280 | if (rbd_req->bio) |
1199 | bio_chain_put(req_data->bio); | 1281 | bio_chain_put(rbd_req->bio); |
1200 | 1282 | ||
1201 | ceph_osdc_put_request(req); | 1283 | ceph_osdc_put_request(osd_req); |
1202 | kfree(req_data); | 1284 | kfree(rbd_req); |
1203 | } | 1285 | } |
1204 | 1286 | ||
1205 | static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | 1287 | static void rbd_simple_req_cb(struct ceph_osd_request *osd_req, |
1288 | struct ceph_msg *msg) | ||
1206 | { | 1289 | { |
1207 | ceph_osdc_put_request(req); | 1290 | ceph_osdc_put_request(osd_req); |
1208 | } | 1291 | } |
1209 | 1292 | ||
1210 | /* | 1293 | /* |
1211 | * Do a synchronous ceph osd operation | 1294 | * Do a synchronous ceph osd operation |
1212 | */ | 1295 | */ |
1213 | static int rbd_req_sync_op(struct rbd_device *rbd_dev, | 1296 | static int rbd_req_sync_op(struct rbd_device *rbd_dev, |
1214 | struct ceph_snap_context *snapc, | ||
1215 | u64 snapid, | ||
1216 | int flags, | 1297 | int flags, |
1217 | struct ceph_osd_req_op *ops, | 1298 | struct ceph_osd_req_op *op, |
1218 | const char *object_name, | 1299 | const char *object_name, |
1219 | u64 ofs, u64 inbound_size, | 1300 | u64 ofs, u64 inbound_size, |
1220 | char *inbound, | 1301 | char *inbound, |
1221 | struct ceph_osd_request **linger_req, | ||
1222 | u64 *ver) | 1302 | u64 *ver) |
1223 | { | 1303 | { |
1224 | int ret; | 1304 | int ret; |
1225 | struct page **pages; | 1305 | struct page **pages; |
1226 | int num_pages; | 1306 | int num_pages; |
1227 | 1307 | ||
1228 | rbd_assert(ops != NULL); | 1308 | rbd_assert(op != NULL); |
1229 | 1309 | ||
1230 | num_pages = calc_pages_for(ofs, inbound_size); | 1310 | num_pages = calc_pages_for(ofs, inbound_size); |
1231 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | 1311 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); |
1232 | if (IS_ERR(pages)) | 1312 | if (IS_ERR(pages)) |
1233 | return PTR_ERR(pages); | 1313 | return PTR_ERR(pages); |
1234 | 1314 | ||
1235 | ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, | 1315 | ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, |
1236 | object_name, ofs, inbound_size, NULL, | 1316 | object_name, ofs, inbound_size, NULL, |
1237 | pages, num_pages, | 1317 | pages, num_pages, |
1238 | flags, | 1318 | flags, |
1239 | ops, | 1319 | op, |
1240 | NULL, 0, | 1320 | NULL, 0, |
1241 | NULL, | 1321 | NULL, |
1242 | linger_req, ver); | 1322 | ver); |
1243 | if (ret < 0) | 1323 | if (ret < 0) |
1244 | goto done; | 1324 | goto done; |
1245 | 1325 | ||
@@ -1262,12 +1342,11 @@ static int rbd_do_op(struct request *rq, | |||
1262 | struct rbd_req_coll *coll, | 1342 | struct rbd_req_coll *coll, |
1263 | int coll_index) | 1343 | int coll_index) |
1264 | { | 1344 | { |
1265 | char *seg_name; | 1345 | const char *seg_name; |
1266 | u64 seg_ofs; | 1346 | u64 seg_ofs; |
1267 | u64 seg_len; | 1347 | u64 seg_len; |
1268 | int ret; | 1348 | int ret; |
1269 | struct ceph_osd_req_op *ops; | 1349 | struct ceph_osd_req_op *op; |
1270 | u32 payload_len; | ||
1271 | int opcode; | 1350 | int opcode; |
1272 | int flags; | 1351 | int flags; |
1273 | u64 snapid; | 1352 | u64 snapid; |
@@ -1282,18 +1361,16 @@ static int rbd_do_op(struct request *rq, | |||
1282 | opcode = CEPH_OSD_OP_WRITE; | 1361 | opcode = CEPH_OSD_OP_WRITE; |
1283 | flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; | 1362 | flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; |
1284 | snapid = CEPH_NOSNAP; | 1363 | snapid = CEPH_NOSNAP; |
1285 | payload_len = seg_len; | ||
1286 | } else { | 1364 | } else { |
1287 | opcode = CEPH_OSD_OP_READ; | 1365 | opcode = CEPH_OSD_OP_READ; |
1288 | flags = CEPH_OSD_FLAG_READ; | 1366 | flags = CEPH_OSD_FLAG_READ; |
1289 | snapc = NULL; | 1367 | rbd_assert(!snapc); |
1290 | snapid = rbd_dev->spec->snap_id; | 1368 | snapid = rbd_dev->spec->snap_id; |
1291 | payload_len = 0; | ||
1292 | } | 1369 | } |
1293 | 1370 | ||
1294 | ret = -ENOMEM; | 1371 | ret = -ENOMEM; |
1295 | ops = rbd_create_rw_ops(1, opcode, payload_len); | 1372 | op = rbd_osd_req_op_create(opcode, seg_ofs, seg_len); |
1296 | if (!ops) | 1373 | if (!op) |
1297 | goto done; | 1374 | goto done; |
1298 | 1375 | ||
1299 | /* we've taken care of segment sizes earlier when we | 1376 | /* we've taken care of segment sizes earlier when we |
@@ -1306,11 +1383,13 @@ static int rbd_do_op(struct request *rq, | |||
1306 | bio, | 1383 | bio, |
1307 | NULL, 0, | 1384 | NULL, 0, |
1308 | flags, | 1385 | flags, |
1309 | ops, | 1386 | op, |
1310 | coll, coll_index, | 1387 | coll, coll_index, |
1311 | rbd_req_cb, 0, NULL); | 1388 | rbd_req_cb, NULL); |
1312 | 1389 | if (ret < 0) | |
1313 | rbd_destroy_ops(ops); | 1390 | rbd_coll_end_req_index(rq, coll, coll_index, |
1391 | (s32)ret, seg_len); | ||
1392 | rbd_osd_req_op_destroy(op); | ||
1314 | done: | 1393 | done: |
1315 | kfree(seg_name); | 1394 | kfree(seg_name); |
1316 | return ret; | 1395 | return ret; |
@@ -1320,24 +1399,21 @@ done: | |||
1320 | * Request sync osd read | 1399 | * Request sync osd read |
1321 | */ | 1400 | */ |
1322 | static int rbd_req_sync_read(struct rbd_device *rbd_dev, | 1401 | static int rbd_req_sync_read(struct rbd_device *rbd_dev, |
1323 | u64 snapid, | ||
1324 | const char *object_name, | 1402 | const char *object_name, |
1325 | u64 ofs, u64 len, | 1403 | u64 ofs, u64 len, |
1326 | char *buf, | 1404 | char *buf, |
1327 | u64 *ver) | 1405 | u64 *ver) |
1328 | { | 1406 | { |
1329 | struct ceph_osd_req_op *ops; | 1407 | struct ceph_osd_req_op *op; |
1330 | int ret; | 1408 | int ret; |
1331 | 1409 | ||
1332 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); | 1410 | op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, ofs, len); |
1333 | if (!ops) | 1411 | if (!op) |
1334 | return -ENOMEM; | 1412 | return -ENOMEM; |
1335 | 1413 | ||
1336 | ret = rbd_req_sync_op(rbd_dev, NULL, | 1414 | ret = rbd_req_sync_op(rbd_dev, CEPH_OSD_FLAG_READ, |
1337 | snapid, | 1415 | op, object_name, ofs, len, buf, ver); |
1338 | CEPH_OSD_FLAG_READ, | 1416 | rbd_osd_req_op_destroy(op); |
1339 | ops, object_name, ofs, len, buf, NULL, ver); | ||
1340 | rbd_destroy_ops(ops); | ||
1341 | 1417 | ||
1342 | return ret; | 1418 | return ret; |
1343 | } | 1419 | } |
@@ -1349,26 +1425,23 @@ static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, | |||
1349 | u64 ver, | 1425 | u64 ver, |
1350 | u64 notify_id) | 1426 | u64 notify_id) |
1351 | { | 1427 | { |
1352 | struct ceph_osd_req_op *ops; | 1428 | struct ceph_osd_req_op *op; |
1353 | int ret; | 1429 | int ret; |
1354 | 1430 | ||
1355 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); | 1431 | op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); |
1356 | if (!ops) | 1432 | if (!op) |
1357 | return -ENOMEM; | 1433 | return -ENOMEM; |
1358 | 1434 | ||
1359 | ops[0].watch.ver = cpu_to_le64(ver); | ||
1360 | ops[0].watch.cookie = notify_id; | ||
1361 | ops[0].watch.flag = 0; | ||
1362 | |||
1363 | ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, | 1435 | ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, |
1364 | rbd_dev->header_name, 0, 0, NULL, | 1436 | rbd_dev->header_name, 0, 0, NULL, |
1365 | NULL, 0, | 1437 | NULL, 0, |
1366 | CEPH_OSD_FLAG_READ, | 1438 | CEPH_OSD_FLAG_READ, |
1367 | ops, | 1439 | op, |
1368 | NULL, 0, | 1440 | NULL, 0, |
1369 | rbd_simple_req_cb, 0, NULL); | 1441 | rbd_simple_req_cb, NULL); |
1442 | |||
1443 | rbd_osd_req_op_destroy(op); | ||
1370 | 1444 | ||
1371 | rbd_destroy_ops(ops); | ||
1372 | return ret; | 1445 | return ret; |
1373 | } | 1446 | } |
1374 | 1447 | ||
@@ -1386,83 +1459,51 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
1386 | (unsigned int) opcode); | 1459 | (unsigned int) opcode); |
1387 | rc = rbd_dev_refresh(rbd_dev, &hver); | 1460 | rc = rbd_dev_refresh(rbd_dev, &hver); |
1388 | if (rc) | 1461 | if (rc) |
1389 | pr_warning(RBD_DRV_NAME "%d got notification but failed to " | 1462 | rbd_warn(rbd_dev, "got notification but failed to " |
1390 | " update snaps: %d\n", rbd_dev->major, rc); | 1463 | " update snaps: %d\n", rc); |
1391 | 1464 | ||
1392 | rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); | 1465 | rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); |
1393 | } | 1466 | } |
1394 | 1467 | ||
1395 | /* | 1468 | /* |
1396 | * Request sync osd watch | 1469 | * Request sync osd watch/unwatch. The value of "start" determines |
1470 | * whether a watch request is being initiated or torn down. | ||
1397 | */ | 1471 | */ |
1398 | static int rbd_req_sync_watch(struct rbd_device *rbd_dev) | 1472 | static int rbd_req_sync_watch(struct rbd_device *rbd_dev, int start) |
1399 | { | 1473 | { |
1400 | struct ceph_osd_req_op *ops; | 1474 | struct ceph_osd_req_op *op; |
1401 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; | 1475 | int ret = 0; |
1402 | int ret; | ||
1403 | 1476 | ||
1404 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); | 1477 | rbd_assert(start ^ !!rbd_dev->watch_event); |
1405 | if (!ops) | 1478 | rbd_assert(start ^ !!rbd_dev->watch_request); |
1406 | return -ENOMEM; | ||
1407 | 1479 | ||
1408 | ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, | 1480 | if (start) { |
1409 | (void *)rbd_dev, &rbd_dev->watch_event); | 1481 | struct ceph_osd_client *osdc; |
1410 | if (ret < 0) | ||
1411 | goto fail; | ||
1412 | 1482 | ||
1413 | ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); | 1483 | osdc = &rbd_dev->rbd_client->client->osdc; |
1414 | ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); | 1484 | ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, rbd_dev, |
1415 | ops[0].watch.flag = 1; | 1485 | &rbd_dev->watch_event); |
1486 | if (ret < 0) | ||
1487 | return ret; | ||
1488 | } | ||
1416 | 1489 | ||
1417 | ret = rbd_req_sync_op(rbd_dev, NULL, | 1490 | op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, |
1418 | CEPH_NOSNAP, | 1491 | rbd_dev->watch_event->cookie, |
1492 | rbd_dev->header.obj_version, start); | ||
1493 | if (op) | ||
1494 | ret = rbd_req_sync_op(rbd_dev, | ||
1419 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1495 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, |
1420 | ops, | 1496 | op, rbd_dev->header_name, |
1421 | rbd_dev->header_name, | 1497 | 0, 0, NULL, NULL); |
1422 | 0, 0, NULL, | ||
1423 | &rbd_dev->watch_request, NULL); | ||
1424 | 1498 | ||
1425 | if (ret < 0) | 1499 | /* Cancel the event if we're tearing down, or on error */ |
1426 | goto fail_event; | ||
1427 | |||
1428 | rbd_destroy_ops(ops); | ||
1429 | return 0; | ||
1430 | |||
1431 | fail_event: | ||
1432 | ceph_osdc_cancel_event(rbd_dev->watch_event); | ||
1433 | rbd_dev->watch_event = NULL; | ||
1434 | fail: | ||
1435 | rbd_destroy_ops(ops); | ||
1436 | return ret; | ||
1437 | } | ||
1438 | |||
1439 | /* | ||
1440 | * Request sync osd unwatch | ||
1441 | */ | ||
1442 | static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) | ||
1443 | { | ||
1444 | struct ceph_osd_req_op *ops; | ||
1445 | int ret; | ||
1446 | |||
1447 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); | ||
1448 | if (!ops) | ||
1449 | return -ENOMEM; | ||
1450 | |||
1451 | ops[0].watch.ver = 0; | ||
1452 | ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); | ||
1453 | ops[0].watch.flag = 0; | ||
1454 | |||
1455 | ret = rbd_req_sync_op(rbd_dev, NULL, | ||
1456 | CEPH_NOSNAP, | ||
1457 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
1458 | ops, | ||
1459 | rbd_dev->header_name, | ||
1460 | 0, 0, NULL, NULL, NULL); | ||
1461 | 1500 | ||
1501 | if (!start || !op || ret < 0) { | ||
1502 | ceph_osdc_cancel_event(rbd_dev->watch_event); | ||
1503 | rbd_dev->watch_event = NULL; | ||
1504 | } | ||
1505 | rbd_osd_req_op_destroy(op); | ||
1462 | 1506 | ||
1463 | rbd_destroy_ops(ops); | ||
1464 | ceph_osdc_cancel_event(rbd_dev->watch_event); | ||
1465 | rbd_dev->watch_event = NULL; | ||
1466 | return ret; | 1507 | return ret; |
1467 | } | 1508 | } |
1468 | 1509 | ||
@@ -1477,13 +1518,9 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, | |||
1477 | size_t outbound_size, | 1518 | size_t outbound_size, |
1478 | char *inbound, | 1519 | char *inbound, |
1479 | size_t inbound_size, | 1520 | size_t inbound_size, |
1480 | int flags, | ||
1481 | u64 *ver) | 1521 | u64 *ver) |
1482 | { | 1522 | { |
1483 | struct ceph_osd_req_op *ops; | 1523 | struct ceph_osd_req_op *op; |
1484 | int class_name_len = strlen(class_name); | ||
1485 | int method_name_len = strlen(method_name); | ||
1486 | int payload_size; | ||
1487 | int ret; | 1524 | int ret; |
1488 | 1525 | ||
1489 | /* | 1526 | /* |
@@ -1494,26 +1531,16 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, | |||
1494 | * the perspective of the server side) in the OSD request | 1531 | * the perspective of the server side) in the OSD request |
1495 | * operation. | 1532 | * operation. |
1496 | */ | 1533 | */ |
1497 | payload_size = class_name_len + method_name_len + outbound_size; | 1534 | op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, |
1498 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); | 1535 | method_name, outbound, outbound_size); |
1499 | if (!ops) | 1536 | if (!op) |
1500 | return -ENOMEM; | 1537 | return -ENOMEM; |
1501 | 1538 | ||
1502 | ops[0].cls.class_name = class_name; | 1539 | ret = rbd_req_sync_op(rbd_dev, CEPH_OSD_FLAG_READ, op, |
1503 | ops[0].cls.class_len = (__u8) class_name_len; | ||
1504 | ops[0].cls.method_name = method_name; | ||
1505 | ops[0].cls.method_len = (__u8) method_name_len; | ||
1506 | ops[0].cls.argc = 0; | ||
1507 | ops[0].cls.indata = outbound; | ||
1508 | ops[0].cls.indata_len = outbound_size; | ||
1509 | |||
1510 | ret = rbd_req_sync_op(rbd_dev, NULL, | ||
1511 | CEPH_NOSNAP, | ||
1512 | flags, ops, | ||
1513 | object_name, 0, inbound_size, inbound, | 1540 | object_name, 0, inbound_size, inbound, |
1514 | NULL, ver); | 1541 | ver); |
1515 | 1542 | ||
1516 | rbd_destroy_ops(ops); | 1543 | rbd_osd_req_op_destroy(op); |
1517 | 1544 | ||
1518 | dout("cls_exec returned %d\n", ret); | 1545 | dout("cls_exec returned %d\n", ret); |
1519 | return ret; | 1546 | return ret; |
@@ -1533,113 +1560,123 @@ static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) | |||
1533 | return coll; | 1560 | return coll; |
1534 | } | 1561 | } |
1535 | 1562 | ||
1563 | static int rbd_dev_do_request(struct request *rq, | ||
1564 | struct rbd_device *rbd_dev, | ||
1565 | struct ceph_snap_context *snapc, | ||
1566 | u64 ofs, unsigned int size, | ||
1567 | struct bio *bio_chain) | ||
1568 | { | ||
1569 | int num_segs; | ||
1570 | struct rbd_req_coll *coll; | ||
1571 | unsigned int bio_offset; | ||
1572 | int cur_seg = 0; | ||
1573 | |||
1574 | dout("%s 0x%x bytes at 0x%llx\n", | ||
1575 | rq_data_dir(rq) == WRITE ? "write" : "read", | ||
1576 | size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); | ||
1577 | |||
1578 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); | ||
1579 | if (num_segs <= 0) | ||
1580 | return num_segs; | ||
1581 | |||
1582 | coll = rbd_alloc_coll(num_segs); | ||
1583 | if (!coll) | ||
1584 | return -ENOMEM; | ||
1585 | |||
1586 | bio_offset = 0; | ||
1587 | do { | ||
1588 | u64 limit = rbd_segment_length(rbd_dev, ofs, size); | ||
1589 | unsigned int clone_size; | ||
1590 | struct bio *bio_clone; | ||
1591 | |||
1592 | BUG_ON(limit > (u64)UINT_MAX); | ||
1593 | clone_size = (unsigned int)limit; | ||
1594 | dout("bio_chain->bi_vcnt=%hu\n", bio_chain->bi_vcnt); | ||
1595 | |||
1596 | kref_get(&coll->kref); | ||
1597 | |||
1598 | /* Pass a cloned bio chain via an osd request */ | ||
1599 | |||
1600 | bio_clone = bio_chain_clone_range(&bio_chain, | ||
1601 | &bio_offset, clone_size, | ||
1602 | GFP_ATOMIC); | ||
1603 | if (bio_clone) | ||
1604 | (void)rbd_do_op(rq, rbd_dev, snapc, | ||
1605 | ofs, clone_size, | ||
1606 | bio_clone, coll, cur_seg); | ||
1607 | else | ||
1608 | rbd_coll_end_req_index(rq, coll, cur_seg, | ||
1609 | (s32)-ENOMEM, | ||
1610 | clone_size); | ||
1611 | size -= clone_size; | ||
1612 | ofs += clone_size; | ||
1613 | |||
1614 | cur_seg++; | ||
1615 | } while (size > 0); | ||
1616 | kref_put(&coll->kref, rbd_coll_release); | ||
1617 | |||
1618 | return 0; | ||
1619 | } | ||
1620 | |||
1536 | /* | 1621 | /* |
1537 | * block device queue callback | 1622 | * block device queue callback |
1538 | */ | 1623 | */ |
1539 | static void rbd_rq_fn(struct request_queue *q) | 1624 | static void rbd_rq_fn(struct request_queue *q) |
1540 | { | 1625 | { |
1541 | struct rbd_device *rbd_dev = q->queuedata; | 1626 | struct rbd_device *rbd_dev = q->queuedata; |
1627 | bool read_only = rbd_dev->mapping.read_only; | ||
1542 | struct request *rq; | 1628 | struct request *rq; |
1543 | 1629 | ||
1544 | while ((rq = blk_fetch_request(q))) { | 1630 | while ((rq = blk_fetch_request(q))) { |
1545 | struct bio *bio; | 1631 | struct ceph_snap_context *snapc = NULL; |
1546 | bool do_write; | 1632 | unsigned int size = 0; |
1547 | unsigned int size; | 1633 | int result; |
1548 | u64 ofs; | ||
1549 | int num_segs, cur_seg = 0; | ||
1550 | struct rbd_req_coll *coll; | ||
1551 | struct ceph_snap_context *snapc; | ||
1552 | unsigned int bio_offset; | ||
1553 | 1634 | ||
1554 | dout("fetched request\n"); | 1635 | dout("fetched request\n"); |
1555 | 1636 | ||
1556 | /* filter out block requests we don't understand */ | 1637 | /* Filter out block requests we don't understand */ |
1638 | |||
1557 | if ((rq->cmd_type != REQ_TYPE_FS)) { | 1639 | if ((rq->cmd_type != REQ_TYPE_FS)) { |
1558 | __blk_end_request_all(rq, 0); | 1640 | __blk_end_request_all(rq, 0); |
1559 | continue; | 1641 | continue; |
1560 | } | 1642 | } |
1561 | |||
1562 | /* deduce our operation (read, write) */ | ||
1563 | do_write = (rq_data_dir(rq) == WRITE); | ||
1564 | if (do_write && rbd_dev->mapping.read_only) { | ||
1565 | __blk_end_request_all(rq, -EROFS); | ||
1566 | continue; | ||
1567 | } | ||
1568 | |||
1569 | spin_unlock_irq(q->queue_lock); | 1643 | spin_unlock_irq(q->queue_lock); |
1570 | 1644 | ||
1571 | down_read(&rbd_dev->header_rwsem); | 1645 | /* Write requests need a reference to the snapshot context */ |
1572 | 1646 | ||
1573 | if (!rbd_dev->exists) { | 1647 | if (rq_data_dir(rq) == WRITE) { |
1574 | rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); | 1648 | result = -EROFS; |
1649 | if (read_only) /* Can't write to a read-only device */ | ||
1650 | goto out_end_request; | ||
1651 | |||
1652 | /* | ||
1653 | * Note that each osd request will take its | ||
1654 | * own reference to the snapshot context | ||
1655 | * supplied. The reference we take here | ||
1656 | * just guarantees the one we provide stays | ||
1657 | * valid. | ||
1658 | */ | ||
1659 | down_read(&rbd_dev->header_rwsem); | ||
1660 | snapc = ceph_get_snap_context(rbd_dev->header.snapc); | ||
1575 | up_read(&rbd_dev->header_rwsem); | 1661 | up_read(&rbd_dev->header_rwsem); |
1662 | rbd_assert(snapc != NULL); | ||
1663 | } else if (!atomic_read(&rbd_dev->exists)) { | ||
1664 | rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); | ||
1576 | dout("request for non-existent snapshot"); | 1665 | dout("request for non-existent snapshot"); |
1577 | spin_lock_irq(q->queue_lock); | 1666 | result = -ENXIO; |
1578 | __blk_end_request_all(rq, -ENXIO); | 1667 | goto out_end_request; |
1579 | continue; | ||
1580 | } | 1668 | } |
1581 | 1669 | ||
1582 | snapc = ceph_get_snap_context(rbd_dev->header.snapc); | ||
1583 | |||
1584 | up_read(&rbd_dev->header_rwsem); | ||
1585 | |||
1586 | size = blk_rq_bytes(rq); | 1670 | size = blk_rq_bytes(rq); |
1587 | ofs = blk_rq_pos(rq) * SECTOR_SIZE; | 1671 | result = rbd_dev_do_request(rq, rbd_dev, snapc, |
1588 | bio = rq->bio; | 1672 | blk_rq_pos(rq) * SECTOR_SIZE, |
1589 | 1673 | size, rq->bio); | |
1590 | dout("%s 0x%x bytes at 0x%llx\n", | 1674 | out_end_request: |
1591 | do_write ? "write" : "read", | 1675 | if (snapc) |
1592 | size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); | ||
1593 | |||
1594 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); | ||
1595 | if (num_segs <= 0) { | ||
1596 | spin_lock_irq(q->queue_lock); | ||
1597 | __blk_end_request_all(rq, num_segs); | ||
1598 | ceph_put_snap_context(snapc); | 1676 | ceph_put_snap_context(snapc); |
1599 | continue; | ||
1600 | } | ||
1601 | coll = rbd_alloc_coll(num_segs); | ||
1602 | if (!coll) { | ||
1603 | spin_lock_irq(q->queue_lock); | ||
1604 | __blk_end_request_all(rq, -ENOMEM); | ||
1605 | ceph_put_snap_context(snapc); | ||
1606 | continue; | ||
1607 | } | ||
1608 | |||
1609 | bio_offset = 0; | ||
1610 | do { | ||
1611 | u64 limit = rbd_segment_length(rbd_dev, ofs, size); | ||
1612 | unsigned int chain_size; | ||
1613 | struct bio *bio_chain; | ||
1614 | |||
1615 | BUG_ON(limit > (u64) UINT_MAX); | ||
1616 | chain_size = (unsigned int) limit; | ||
1617 | dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); | ||
1618 | |||
1619 | kref_get(&coll->kref); | ||
1620 | |||
1621 | /* Pass a cloned bio chain via an osd request */ | ||
1622 | |||
1623 | bio_chain = bio_chain_clone_range(&bio, | ||
1624 | &bio_offset, chain_size, | ||
1625 | GFP_ATOMIC); | ||
1626 | if (bio_chain) | ||
1627 | (void) rbd_do_op(rq, rbd_dev, snapc, | ||
1628 | ofs, chain_size, | ||
1629 | bio_chain, coll, cur_seg); | ||
1630 | else | ||
1631 | rbd_coll_end_req_index(rq, coll, cur_seg, | ||
1632 | -ENOMEM, chain_size); | ||
1633 | size -= chain_size; | ||
1634 | ofs += chain_size; | ||
1635 | |||
1636 | cur_seg++; | ||
1637 | } while (size > 0); | ||
1638 | kref_put(&coll->kref, rbd_coll_release); | ||
1639 | |||
1640 | spin_lock_irq(q->queue_lock); | 1677 | spin_lock_irq(q->queue_lock); |
1641 | 1678 | if (!size || result < 0) | |
1642 | ceph_put_snap_context(snapc); | 1679 | __blk_end_request_all(rq, result); |
1643 | } | 1680 | } |
1644 | } | 1681 | } |
1645 | 1682 | ||
@@ -1741,8 +1778,7 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) | |||
1741 | if (!ondisk) | 1778 | if (!ondisk) |
1742 | return ERR_PTR(-ENOMEM); | 1779 | return ERR_PTR(-ENOMEM); |
1743 | 1780 | ||
1744 | ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, | 1781 | ret = rbd_req_sync_read(rbd_dev, rbd_dev->header_name, |
1745 | rbd_dev->header_name, | ||
1746 | 0, size, | 1782 | 0, size, |
1747 | (char *) ondisk, version); | 1783 | (char *) ondisk, version); |
1748 | 1784 | ||
@@ -1750,15 +1786,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) | |||
1750 | goto out_err; | 1786 | goto out_err; |
1751 | if (WARN_ON((size_t) ret < size)) { | 1787 | if (WARN_ON((size_t) ret < size)) { |
1752 | ret = -ENXIO; | 1788 | ret = -ENXIO; |
1753 | pr_warning("short header read for image %s" | 1789 | rbd_warn(rbd_dev, "short header read (want %zd got %d)", |
1754 | " (want %zd got %d)\n", | 1790 | size, ret); |
1755 | rbd_dev->spec->image_name, size, ret); | ||
1756 | goto out_err; | 1791 | goto out_err; |
1757 | } | 1792 | } |
1758 | if (!rbd_dev_ondisk_valid(ondisk)) { | 1793 | if (!rbd_dev_ondisk_valid(ondisk)) { |
1759 | ret = -ENXIO; | 1794 | ret = -ENXIO; |
1760 | pr_warning("invalid header for image %s\n", | 1795 | rbd_warn(rbd_dev, "invalid header"); |
1761 | rbd_dev->spec->image_name); | ||
1762 | goto out_err; | 1796 | goto out_err; |
1763 | } | 1797 | } |
1764 | 1798 | ||
@@ -2243,6 +2277,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
2243 | return NULL; | 2277 | return NULL; |
2244 | 2278 | ||
2245 | spin_lock_init(&rbd_dev->lock); | 2279 | spin_lock_init(&rbd_dev->lock); |
2280 | atomic_set(&rbd_dev->exists, 0); | ||
2246 | INIT_LIST_HEAD(&rbd_dev->node); | 2281 | INIT_LIST_HEAD(&rbd_dev->node); |
2247 | INIT_LIST_HEAD(&rbd_dev->snaps); | 2282 | INIT_LIST_HEAD(&rbd_dev->snaps); |
2248 | init_rwsem(&rbd_dev->header_rwsem); | 2283 | init_rwsem(&rbd_dev->header_rwsem); |
@@ -2250,6 +2285,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
2250 | rbd_dev->spec = spec; | 2285 | rbd_dev->spec = spec; |
2251 | rbd_dev->rbd_client = rbdc; | 2286 | rbd_dev->rbd_client = rbdc; |
2252 | 2287 | ||
2288 | /* Initialize the layout used for all rbd requests */ | ||
2289 | |||
2290 | rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
2291 | rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); | ||
2292 | rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
2293 | rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); | ||
2294 | |||
2253 | return rbd_dev; | 2295 | return rbd_dev; |
2254 | } | 2296 | } |
2255 | 2297 | ||
@@ -2363,8 +2405,7 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, | |||
2363 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2405 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
2364 | "rbd", "get_size", | 2406 | "rbd", "get_size", |
2365 | (char *) &snapid, sizeof (snapid), | 2407 | (char *) &snapid, sizeof (snapid), |
2366 | (char *) &size_buf, sizeof (size_buf), | 2408 | (char *) &size_buf, sizeof (size_buf), NULL); |
2367 | CEPH_OSD_FLAG_READ, NULL); | ||
2368 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2409 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
2369 | if (ret < 0) | 2410 | if (ret < 0) |
2370 | return ret; | 2411 | return ret; |
@@ -2399,8 +2440,7 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) | |||
2399 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2440 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
2400 | "rbd", "get_object_prefix", | 2441 | "rbd", "get_object_prefix", |
2401 | NULL, 0, | 2442 | NULL, 0, |
2402 | reply_buf, RBD_OBJ_PREFIX_LEN_MAX, | 2443 | reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); |
2403 | CEPH_OSD_FLAG_READ, NULL); | ||
2404 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2444 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
2405 | if (ret < 0) | 2445 | if (ret < 0) |
2406 | goto out; | 2446 | goto out; |
@@ -2439,7 +2479,7 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, | |||
2439 | "rbd", "get_features", | 2479 | "rbd", "get_features", |
2440 | (char *) &snapid, sizeof (snapid), | 2480 | (char *) &snapid, sizeof (snapid), |
2441 | (char *) &features_buf, sizeof (features_buf), | 2481 | (char *) &features_buf, sizeof (features_buf), |
2442 | CEPH_OSD_FLAG_READ, NULL); | 2482 | NULL); |
2443 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2483 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
2444 | if (ret < 0) | 2484 | if (ret < 0) |
2445 | return ret; | 2485 | return ret; |
@@ -2474,7 +2514,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
2474 | void *end; | 2514 | void *end; |
2475 | char *image_id; | 2515 | char *image_id; |
2476 | u64 overlap; | 2516 | u64 overlap; |
2477 | size_t len = 0; | ||
2478 | int ret; | 2517 | int ret; |
2479 | 2518 | ||
2480 | parent_spec = rbd_spec_alloc(); | 2519 | parent_spec = rbd_spec_alloc(); |
@@ -2495,8 +2534,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
2495 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2534 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
2496 | "rbd", "get_parent", | 2535 | "rbd", "get_parent", |
2497 | (char *) &snapid, sizeof (snapid), | 2536 | (char *) &snapid, sizeof (snapid), |
2498 | (char *) reply_buf, size, | 2537 | (char *) reply_buf, size, NULL); |
2499 | CEPH_OSD_FLAG_READ, NULL); | ||
2500 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2538 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
2501 | if (ret < 0) | 2539 | if (ret < 0) |
2502 | goto out_err; | 2540 | goto out_err; |
@@ -2508,13 +2546,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
2508 | if (parent_spec->pool_id == CEPH_NOPOOL) | 2546 | if (parent_spec->pool_id == CEPH_NOPOOL) |
2509 | goto out; /* No parent? No problem. */ | 2547 | goto out; /* No parent? No problem. */ |
2510 | 2548 | ||
2511 | image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); | 2549 | /* The ceph file layout needs to fit pool id in 32 bits */ |
2550 | |||
2551 | ret = -EIO; | ||
2552 | if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) | ||
2553 | goto out; | ||
2554 | |||
2555 | image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); | ||
2512 | if (IS_ERR(image_id)) { | 2556 | if (IS_ERR(image_id)) { |
2513 | ret = PTR_ERR(image_id); | 2557 | ret = PTR_ERR(image_id); |
2514 | goto out_err; | 2558 | goto out_err; |
2515 | } | 2559 | } |
2516 | parent_spec->image_id = image_id; | 2560 | parent_spec->image_id = image_id; |
2517 | parent_spec->image_id_len = len; | ||
2518 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); | 2561 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); |
2519 | ceph_decode_64_safe(&p, end, overlap, out_err); | 2562 | ceph_decode_64_safe(&p, end, overlap, out_err); |
2520 | 2563 | ||
@@ -2544,15 +2587,15 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) | |||
2544 | 2587 | ||
2545 | rbd_assert(!rbd_dev->spec->image_name); | 2588 | rbd_assert(!rbd_dev->spec->image_name); |
2546 | 2589 | ||
2547 | image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; | 2590 | len = strlen(rbd_dev->spec->image_id); |
2591 | image_id_size = sizeof (__le32) + len; | ||
2548 | image_id = kmalloc(image_id_size, GFP_KERNEL); | 2592 | image_id = kmalloc(image_id_size, GFP_KERNEL); |
2549 | if (!image_id) | 2593 | if (!image_id) |
2550 | return NULL; | 2594 | return NULL; |
2551 | 2595 | ||
2552 | p = image_id; | 2596 | p = image_id; |
2553 | end = (char *) image_id + image_id_size; | 2597 | end = (char *) image_id + image_id_size; |
2554 | ceph_encode_string(&p, end, rbd_dev->spec->image_id, | 2598 | ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); |
2555 | (u32) rbd_dev->spec->image_id_len); | ||
2556 | 2599 | ||
2557 | size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; | 2600 | size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; |
2558 | reply_buf = kmalloc(size, GFP_KERNEL); | 2601 | reply_buf = kmalloc(size, GFP_KERNEL); |
@@ -2562,8 +2605,7 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) | |||
2562 | ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, | 2605 | ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, |
2563 | "rbd", "dir_get_name", | 2606 | "rbd", "dir_get_name", |
2564 | image_id, image_id_size, | 2607 | image_id, image_id_size, |
2565 | (char *) reply_buf, size, | 2608 | (char *) reply_buf, size, NULL); |
2566 | CEPH_OSD_FLAG_READ, NULL); | ||
2567 | if (ret < 0) | 2609 | if (ret < 0) |
2568 | goto out; | 2610 | goto out; |
2569 | p = reply_buf; | 2611 | p = reply_buf; |
@@ -2602,8 +2644,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) | |||
2602 | 2644 | ||
2603 | osdc = &rbd_dev->rbd_client->client->osdc; | 2645 | osdc = &rbd_dev->rbd_client->client->osdc; |
2604 | name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); | 2646 | name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); |
2605 | if (!name) | 2647 | if (!name) { |
2606 | return -EIO; /* pool id too large (>= 2^31) */ | 2648 | rbd_warn(rbd_dev, "there is no pool with id %llu", |
2649 | rbd_dev->spec->pool_id); /* Really a BUG() */ | ||
2650 | return -EIO; | ||
2651 | } | ||
2607 | 2652 | ||
2608 | rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); | 2653 | rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); |
2609 | if (!rbd_dev->spec->pool_name) | 2654 | if (!rbd_dev->spec->pool_name) |
@@ -2612,19 +2657,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) | |||
2612 | /* Fetch the image name; tolerate failure here */ | 2657 | /* Fetch the image name; tolerate failure here */ |
2613 | 2658 | ||
2614 | name = rbd_dev_image_name(rbd_dev); | 2659 | name = rbd_dev_image_name(rbd_dev); |
2615 | if (name) { | 2660 | if (name) |
2616 | rbd_dev->spec->image_name_len = strlen(name); | ||
2617 | rbd_dev->spec->image_name = (char *) name; | 2661 | rbd_dev->spec->image_name = (char *) name; |
2618 | } else { | 2662 | else |
2619 | pr_warning(RBD_DRV_NAME "%d " | 2663 | rbd_warn(rbd_dev, "unable to get image name"); |
2620 | "unable to get image name for image id %s\n", | ||
2621 | rbd_dev->major, rbd_dev->spec->image_id); | ||
2622 | } | ||
2623 | 2664 | ||
2624 | /* Look up the snapshot name. */ | 2665 | /* Look up the snapshot name. */ |
2625 | 2666 | ||
2626 | name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); | 2667 | name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); |
2627 | if (!name) { | 2668 | if (!name) { |
2669 | rbd_warn(rbd_dev, "no snapshot with id %llu", | ||
2670 | rbd_dev->spec->snap_id); /* Really a BUG() */ | ||
2628 | ret = -EIO; | 2671 | ret = -EIO; |
2629 | goto out_err; | 2672 | goto out_err; |
2630 | } | 2673 | } |
@@ -2668,8 +2711,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) | |||
2668 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2711 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
2669 | "rbd", "get_snapcontext", | 2712 | "rbd", "get_snapcontext", |
2670 | NULL, 0, | 2713 | NULL, 0, |
2671 | reply_buf, size, | 2714 | reply_buf, size, ver); |
2672 | CEPH_OSD_FLAG_READ, ver); | ||
2673 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2715 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
2674 | if (ret < 0) | 2716 | if (ret < 0) |
2675 | goto out; | 2717 | goto out; |
@@ -2738,8 +2780,7 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) | |||
2738 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2780 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
2739 | "rbd", "get_snapshot_name", | 2781 | "rbd", "get_snapshot_name", |
2740 | (char *) &snap_id, sizeof (snap_id), | 2782 | (char *) &snap_id, sizeof (snap_id), |
2741 | reply_buf, size, | 2783 | reply_buf, size, NULL); |
2742 | CEPH_OSD_FLAG_READ, NULL); | ||
2743 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2784 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
2744 | if (ret < 0) | 2785 | if (ret < 0) |
2745 | goto out; | 2786 | goto out; |
@@ -2766,7 +2807,7 @@ out: | |||
2766 | static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, | 2807 | static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, |
2767 | u64 *snap_size, u64 *snap_features) | 2808 | u64 *snap_size, u64 *snap_features) |
2768 | { | 2809 | { |
2769 | __le64 snap_id; | 2810 | u64 snap_id; |
2770 | u8 order; | 2811 | u8 order; |
2771 | int ret; | 2812 | int ret; |
2772 | 2813 | ||
@@ -2868,7 +2909,7 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) | |||
2868 | /* Existing snapshot not in the new snap context */ | 2909 | /* Existing snapshot not in the new snap context */ |
2869 | 2910 | ||
2870 | if (rbd_dev->spec->snap_id == snap->id) | 2911 | if (rbd_dev->spec->snap_id == snap->id) |
2871 | rbd_dev->exists = false; | 2912 | atomic_set(&rbd_dev->exists, 0); |
2872 | rbd_remove_snap_dev(snap); | 2913 | rbd_remove_snap_dev(snap); |
2873 | dout("%ssnap id %llu has been removed\n", | 2914 | dout("%ssnap id %llu has been removed\n", |
2874 | rbd_dev->spec->snap_id == snap->id ? | 2915 | rbd_dev->spec->snap_id == snap->id ? |
@@ -2983,22 +3024,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) | |||
2983 | device_unregister(&rbd_dev->dev); | 3024 | device_unregister(&rbd_dev->dev); |
2984 | } | 3025 | } |
2985 | 3026 | ||
2986 | static int rbd_init_watch_dev(struct rbd_device *rbd_dev) | ||
2987 | { | ||
2988 | int ret, rc; | ||
2989 | |||
2990 | do { | ||
2991 | ret = rbd_req_sync_watch(rbd_dev); | ||
2992 | if (ret == -ERANGE) { | ||
2993 | rc = rbd_dev_refresh(rbd_dev, NULL); | ||
2994 | if (rc < 0) | ||
2995 | return rc; | ||
2996 | } | ||
2997 | } while (ret == -ERANGE); | ||
2998 | |||
2999 | return ret; | ||
3000 | } | ||
3001 | |||
3002 | static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); | 3027 | static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); |
3003 | 3028 | ||
3004 | /* | 3029 | /* |
@@ -3138,11 +3163,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) | |||
3138 | size_t len; | 3163 | size_t len; |
3139 | 3164 | ||
3140 | len = next_token(buf); | 3165 | len = next_token(buf); |
3141 | dup = kmalloc(len + 1, GFP_KERNEL); | 3166 | dup = kmemdup(*buf, len + 1, GFP_KERNEL); |
3142 | if (!dup) | 3167 | if (!dup) |
3143 | return NULL; | 3168 | return NULL; |
3144 | |||
3145 | memcpy(dup, *buf, len); | ||
3146 | *(dup + len) = '\0'; | 3169 | *(dup + len) = '\0'; |
3147 | *buf += len; | 3170 | *buf += len; |
3148 | 3171 | ||
@@ -3210,8 +3233,10 @@ static int rbd_add_parse_args(const char *buf, | |||
3210 | /* The first four tokens are required */ | 3233 | /* The first four tokens are required */ |
3211 | 3234 | ||
3212 | len = next_token(&buf); | 3235 | len = next_token(&buf); |
3213 | if (!len) | 3236 | if (!len) { |
3214 | return -EINVAL; /* Missing monitor address(es) */ | 3237 | rbd_warn(NULL, "no monitor address(es) provided"); |
3238 | return -EINVAL; | ||
3239 | } | ||
3215 | mon_addrs = buf; | 3240 | mon_addrs = buf; |
3216 | mon_addrs_size = len + 1; | 3241 | mon_addrs_size = len + 1; |
3217 | buf += len; | 3242 | buf += len; |
@@ -3220,8 +3245,10 @@ static int rbd_add_parse_args(const char *buf, | |||
3220 | options = dup_token(&buf, NULL); | 3245 | options = dup_token(&buf, NULL); |
3221 | if (!options) | 3246 | if (!options) |
3222 | return -ENOMEM; | 3247 | return -ENOMEM; |
3223 | if (!*options) | 3248 | if (!*options) { |
3224 | goto out_err; /* Missing options */ | 3249 | rbd_warn(NULL, "no options provided"); |
3250 | goto out_err; | ||
3251 | } | ||
3225 | 3252 | ||
3226 | spec = rbd_spec_alloc(); | 3253 | spec = rbd_spec_alloc(); |
3227 | if (!spec) | 3254 | if (!spec) |
@@ -3230,14 +3257,18 @@ static int rbd_add_parse_args(const char *buf, | |||
3230 | spec->pool_name = dup_token(&buf, NULL); | 3257 | spec->pool_name = dup_token(&buf, NULL); |
3231 | if (!spec->pool_name) | 3258 | if (!spec->pool_name) |
3232 | goto out_mem; | 3259 | goto out_mem; |
3233 | if (!*spec->pool_name) | 3260 | if (!*spec->pool_name) { |
3234 | goto out_err; /* Missing pool name */ | 3261 | rbd_warn(NULL, "no pool name provided"); |
3262 | goto out_err; | ||
3263 | } | ||
3235 | 3264 | ||
3236 | spec->image_name = dup_token(&buf, &spec->image_name_len); | 3265 | spec->image_name = dup_token(&buf, NULL); |
3237 | if (!spec->image_name) | 3266 | if (!spec->image_name) |
3238 | goto out_mem; | 3267 | goto out_mem; |
3239 | if (!*spec->image_name) | 3268 | if (!*spec->image_name) { |
3240 | goto out_err; /* Missing image name */ | 3269 | rbd_warn(NULL, "no image name provided"); |
3270 | goto out_err; | ||
3271 | } | ||
3241 | 3272 | ||
3242 | /* | 3273 | /* |
3243 | * Snapshot name is optional; default is to use "-" | 3274 | * Snapshot name is optional; default is to use "-" |
@@ -3251,10 +3282,9 @@ static int rbd_add_parse_args(const char *buf, | |||
3251 | ret = -ENAMETOOLONG; | 3282 | ret = -ENAMETOOLONG; |
3252 | goto out_err; | 3283 | goto out_err; |
3253 | } | 3284 | } |
3254 | spec->snap_name = kmalloc(len + 1, GFP_KERNEL); | 3285 | spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); |
3255 | if (!spec->snap_name) | 3286 | if (!spec->snap_name) |
3256 | goto out_mem; | 3287 | goto out_mem; |
3257 | memcpy(spec->snap_name, buf, len); | ||
3258 | *(spec->snap_name + len) = '\0'; | 3288 | *(spec->snap_name + len) = '\0'; |
3259 | 3289 | ||
3260 | /* Initialize all rbd options to the defaults */ | 3290 | /* Initialize all rbd options to the defaults */ |
@@ -3323,7 +3353,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
3323 | * First, see if the format 2 image id file exists, and if | 3353 | * First, see if the format 2 image id file exists, and if |
3324 | * so, get the image's persistent id from it. | 3354 | * so, get the image's persistent id from it. |
3325 | */ | 3355 | */ |
3326 | size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; | 3356 | size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); |
3327 | object_name = kmalloc(size, GFP_NOIO); | 3357 | object_name = kmalloc(size, GFP_NOIO); |
3328 | if (!object_name) | 3358 | if (!object_name) |
3329 | return -ENOMEM; | 3359 | return -ENOMEM; |
@@ -3342,8 +3372,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
3342 | ret = rbd_req_sync_exec(rbd_dev, object_name, | 3372 | ret = rbd_req_sync_exec(rbd_dev, object_name, |
3343 | "rbd", "get_id", | 3373 | "rbd", "get_id", |
3344 | NULL, 0, | 3374 | NULL, 0, |
3345 | response, RBD_IMAGE_ID_LEN_MAX, | 3375 | response, RBD_IMAGE_ID_LEN_MAX, NULL); |
3346 | CEPH_OSD_FLAG_READ, NULL); | ||
3347 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 3376 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); |
3348 | if (ret < 0) | 3377 | if (ret < 0) |
3349 | goto out; | 3378 | goto out; |
@@ -3352,8 +3381,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
3352 | p = response; | 3381 | p = response; |
3353 | rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, | 3382 | rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, |
3354 | p + RBD_IMAGE_ID_LEN_MAX, | 3383 | p + RBD_IMAGE_ID_LEN_MAX, |
3355 | &rbd_dev->spec->image_id_len, | 3384 | NULL, GFP_NOIO); |
3356 | GFP_NOIO); | ||
3357 | if (IS_ERR(rbd_dev->spec->image_id)) { | 3385 | if (IS_ERR(rbd_dev->spec->image_id)) { |
3358 | ret = PTR_ERR(rbd_dev->spec->image_id); | 3386 | ret = PTR_ERR(rbd_dev->spec->image_id); |
3359 | rbd_dev->spec->image_id = NULL; | 3387 | rbd_dev->spec->image_id = NULL; |
@@ -3377,11 +3405,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) | |||
3377 | rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); | 3405 | rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); |
3378 | if (!rbd_dev->spec->image_id) | 3406 | if (!rbd_dev->spec->image_id) |
3379 | return -ENOMEM; | 3407 | return -ENOMEM; |
3380 | rbd_dev->spec->image_id_len = 0; | ||
3381 | 3408 | ||
3382 | /* Record the header object name for this rbd image. */ | 3409 | /* Record the header object name for this rbd image. */ |
3383 | 3410 | ||
3384 | size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); | 3411 | size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); |
3385 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); | 3412 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); |
3386 | if (!rbd_dev->header_name) { | 3413 | if (!rbd_dev->header_name) { |
3387 | ret = -ENOMEM; | 3414 | ret = -ENOMEM; |
@@ -3427,7 +3454,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | |||
3427 | * Image id was filled in by the caller. Record the header | 3454 | * Image id was filled in by the caller. Record the header |
3428 | * object name for this rbd image. | 3455 | * object name for this rbd image. |
3429 | */ | 3456 | */ |
3430 | size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; | 3457 | size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); |
3431 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); | 3458 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); |
3432 | if (!rbd_dev->header_name) | 3459 | if (!rbd_dev->header_name) |
3433 | return -ENOMEM; | 3460 | return -ENOMEM; |
@@ -3542,7 +3569,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) | |||
3542 | if (ret) | 3569 | if (ret) |
3543 | goto err_out_bus; | 3570 | goto err_out_bus; |
3544 | 3571 | ||
3545 | ret = rbd_init_watch_dev(rbd_dev); | 3572 | ret = rbd_req_sync_watch(rbd_dev, 1); |
3546 | if (ret) | 3573 | if (ret) |
3547 | goto err_out_bus; | 3574 | goto err_out_bus; |
3548 | 3575 | ||
@@ -3638,6 +3665,13 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
3638 | goto err_out_client; | 3665 | goto err_out_client; |
3639 | spec->pool_id = (u64) rc; | 3666 | spec->pool_id = (u64) rc; |
3640 | 3667 | ||
3668 | /* The ceph file layout needs to fit pool id in 32 bits */ | ||
3669 | |||
3670 | if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { | ||
3671 | rc = -EIO; | ||
3672 | goto err_out_client; | ||
3673 | } | ||
3674 | |||
3641 | rbd_dev = rbd_dev_create(rbdc, spec); | 3675 | rbd_dev = rbd_dev_create(rbdc, spec); |
3642 | if (!rbd_dev) | 3676 | if (!rbd_dev) |
3643 | goto err_out_client; | 3677 | goto err_out_client; |
@@ -3698,8 +3732,7 @@ static void rbd_dev_release(struct device *dev) | |||
3698 | rbd_dev->watch_request); | 3732 | rbd_dev->watch_request); |
3699 | } | 3733 | } |
3700 | if (rbd_dev->watch_event) | 3734 | if (rbd_dev->watch_event) |
3701 | rbd_req_sync_unwatch(rbd_dev); | 3735 | rbd_req_sync_watch(rbd_dev, 0); |
3702 | |||
3703 | 3736 | ||
3704 | /* clean up and free blkdev */ | 3737 | /* clean up and free blkdev */ |
3705 | rbd_free_disk(rbd_dev); | 3738 | rbd_free_disk(rbd_dev); |