diff options
-rw-r--r-- | drivers/block/rbd.c | 1773 | ||||
-rw-r--r-- | fs/ceph/addr.c | 7 | ||||
-rw-r--r-- | fs/ceph/caps.c | 32 | ||||
-rw-r--r-- | fs/ceph/file.c | 8 | ||||
-rw-r--r-- | fs/ceph/ioctl.c | 2 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 33 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 6 | ||||
-rw-r--r-- | fs/ceph/strings.c | 4 | ||||
-rw-r--r-- | fs/ceph/super.h | 8 | ||||
-rw-r--r-- | fs/ceph/xattr.c | 210 | ||||
-rw-r--r-- | include/linux/ceph/ceph_features.h | 8 | ||||
-rw-r--r-- | include/linux/ceph/ceph_fs.h | 32 | ||||
-rw-r--r-- | include/linux/ceph/decode.h | 29 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 16 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/osd_client.h | 54 | ||||
-rw-r--r-- | include/linux/ceph/osdmap.h | 2 | ||||
-rw-r--r-- | include/linux/ceph/rados.h | 93 | ||||
-rw-r--r-- | include/linux/crush/crush.h | 2 | ||||
-rw-r--r-- | net/ceph/ceph_common.c | 16 | ||||
-rw-r--r-- | net/ceph/ceph_strings.c | 39 | ||||
-rw-r--r-- | net/ceph/crush/mapper.c | 15 | ||||
-rw-r--r-- | net/ceph/messenger.c | 5 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 418 | ||||
-rw-r--r-- | net/ceph/osdmap.c | 43 | ||||
-rw-r--r-- | net/ceph/pagevec.c | 24 |
26 files changed, 1756 insertions, 1125 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0b3f2e..b0eea3eaee93 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -52,9 +52,12 @@ | |||
52 | #define SECTOR_SHIFT 9 | 52 | #define SECTOR_SHIFT 9 |
53 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) | 53 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) |
54 | 54 | ||
55 | /* It might be useful to have this defined elsewhere too */ | 55 | /* It might be useful to have these defined elsewhere */ |
56 | 56 | ||
57 | #define U64_MAX ((u64) (~0ULL)) | 57 | #define U8_MAX ((u8) (~0U)) |
58 | #define U16_MAX ((u16) (~0U)) | ||
59 | #define U32_MAX ((u32) (~0U)) | ||
60 | #define U64_MAX ((u64) (~0ULL)) | ||
58 | 61 | ||
59 | #define RBD_DRV_NAME "rbd" | 62 | #define RBD_DRV_NAME "rbd" |
60 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" | 63 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" |
@@ -66,7 +69,6 @@ | |||
66 | (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) | 69 | (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) |
67 | 70 | ||
68 | #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ | 71 | #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ |
69 | #define RBD_MAX_OPT_LEN 1024 | ||
70 | 72 | ||
71 | #define RBD_SNAP_HEAD_NAME "-" | 73 | #define RBD_SNAP_HEAD_NAME "-" |
72 | 74 | ||
@@ -93,8 +95,6 @@ | |||
93 | #define DEV_NAME_LEN 32 | 95 | #define DEV_NAME_LEN 32 |
94 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) | 96 | #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) |
95 | 97 | ||
96 | #define RBD_READ_ONLY_DEFAULT false | ||
97 | |||
98 | /* | 98 | /* |
99 | * block device image metadata (in-memory version) | 99 | * block device image metadata (in-memory version) |
100 | */ | 100 | */ |
@@ -119,16 +119,33 @@ struct rbd_image_header { | |||
119 | * An rbd image specification. | 119 | * An rbd image specification. |
120 | * | 120 | * |
121 | * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely | 121 | * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely |
122 | * identify an image. | 122 | * identify an image. Each rbd_dev structure includes a pointer to |
123 | * an rbd_spec structure that encapsulates this identity. | ||
124 | * | ||
125 | * Each of the id's in an rbd_spec has an associated name. For a | ||
126 | * user-mapped image, the names are supplied and the id's associated | ||
127 | * with them are looked up. For a layered image, a parent image is | ||
128 | * defined by the tuple, and the names are looked up. | ||
129 | * | ||
130 | * An rbd_dev structure contains a parent_spec pointer which is | ||
131 | * non-null if the image it represents is a child in a layered | ||
132 | * image. This pointer will refer to the rbd_spec structure used | ||
133 | * by the parent rbd_dev for its own identity (i.e., the structure | ||
134 | * is shared between the parent and child). | ||
135 | * | ||
136 | * Since these structures are populated once, during the discovery | ||
137 | * phase of image construction, they are effectively immutable so | ||
138 | * we make no effort to synchronize access to them. | ||
139 | * | ||
140 | * Note that code herein does not assume the image name is known (it | ||
141 | * could be a null pointer). | ||
123 | */ | 142 | */ |
124 | struct rbd_spec { | 143 | struct rbd_spec { |
125 | u64 pool_id; | 144 | u64 pool_id; |
126 | char *pool_name; | 145 | char *pool_name; |
127 | 146 | ||
128 | char *image_id; | 147 | char *image_id; |
129 | size_t image_id_len; | ||
130 | char *image_name; | 148 | char *image_name; |
131 | size_t image_name_len; | ||
132 | 149 | ||
133 | u64 snap_id; | 150 | u64 snap_id; |
134 | char *snap_name; | 151 | char *snap_name; |
@@ -136,10 +153,6 @@ struct rbd_spec { | |||
136 | struct kref kref; | 153 | struct kref kref; |
137 | }; | 154 | }; |
138 | 155 | ||
139 | struct rbd_options { | ||
140 | bool read_only; | ||
141 | }; | ||
142 | |||
143 | /* | 156 | /* |
144 | * an instance of the client. multiple devices may share an rbd client. | 157 | * an instance of the client. multiple devices may share an rbd client. |
145 | */ | 158 | */ |
@@ -149,37 +162,76 @@ struct rbd_client { | |||
149 | struct list_head node; | 162 | struct list_head node; |
150 | }; | 163 | }; |
151 | 164 | ||
152 | /* | 165 | struct rbd_img_request; |
153 | * a request completion status | 166 | typedef void (*rbd_img_callback_t)(struct rbd_img_request *); |
154 | */ | 167 | |
155 | struct rbd_req_status { | 168 | #define BAD_WHICH U32_MAX /* Good which or bad which, which? */ |
156 | int done; | 169 | |
157 | int rc; | 170 | struct rbd_obj_request; |
158 | u64 bytes; | 171 | typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); |
172 | |||
173 | enum obj_request_type { | ||
174 | OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES | ||
159 | }; | 175 | }; |
160 | 176 | ||
161 | /* | 177 | struct rbd_obj_request { |
162 | * a collection of requests | 178 | const char *object_name; |
163 | */ | 179 | u64 offset; /* object start byte */ |
164 | struct rbd_req_coll { | 180 | u64 length; /* bytes from offset */ |
165 | int total; | 181 | |
166 | int num_done; | 182 | struct rbd_img_request *img_request; |
183 | struct list_head links; /* img_request->obj_requests */ | ||
184 | u32 which; /* posn image request list */ | ||
185 | |||
186 | enum obj_request_type type; | ||
187 | union { | ||
188 | struct bio *bio_list; | ||
189 | struct { | ||
190 | struct page **pages; | ||
191 | u32 page_count; | ||
192 | }; | ||
193 | }; | ||
194 | |||
195 | struct ceph_osd_request *osd_req; | ||
196 | |||
197 | u64 xferred; /* bytes transferred */ | ||
198 | u64 version; | ||
199 | s32 result; | ||
200 | atomic_t done; | ||
201 | |||
202 | rbd_obj_callback_t callback; | ||
203 | struct completion completion; | ||
204 | |||
167 | struct kref kref; | 205 | struct kref kref; |
168 | struct rbd_req_status status[0]; | ||
169 | }; | 206 | }; |
170 | 207 | ||
171 | /* | 208 | struct rbd_img_request { |
172 | * a single io request | 209 | struct request *rq; |
173 | */ | 210 | struct rbd_device *rbd_dev; |
174 | struct rbd_request { | 211 | u64 offset; /* starting image byte offset */ |
175 | struct request *rq; /* blk layer request */ | 212 | u64 length; /* byte count from offset */ |
176 | struct bio *bio; /* cloned bio */ | 213 | bool write_request; /* false for read */ |
177 | struct page **pages; /* list of used pages */ | 214 | union { |
178 | u64 len; | 215 | struct ceph_snap_context *snapc; /* for writes */ |
179 | int coll_index; | 216 | u64 snap_id; /* for reads */ |
180 | struct rbd_req_coll *coll; | 217 | }; |
218 | spinlock_t completion_lock;/* protects next_completion */ | ||
219 | u32 next_completion; | ||
220 | rbd_img_callback_t callback; | ||
221 | |||
222 | u32 obj_request_count; | ||
223 | struct list_head obj_requests; /* rbd_obj_request structs */ | ||
224 | |||
225 | struct kref kref; | ||
181 | }; | 226 | }; |
182 | 227 | ||
228 | #define for_each_obj_request(ireq, oreq) \ | ||
229 | list_for_each_entry(oreq, &(ireq)->obj_requests, links) | ||
230 | #define for_each_obj_request_from(ireq, oreq) \ | ||
231 | list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) | ||
232 | #define for_each_obj_request_safe(ireq, oreq, n) \ | ||
233 | list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) | ||
234 | |||
183 | struct rbd_snap { | 235 | struct rbd_snap { |
184 | struct device dev; | 236 | struct device dev; |
185 | const char *name; | 237 | const char *name; |
@@ -209,16 +261,18 @@ struct rbd_device { | |||
209 | 261 | ||
210 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ | 262 | char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ |
211 | 263 | ||
212 | spinlock_t lock; /* queue lock */ | 264 | spinlock_t lock; /* queue, flags, open_count */ |
213 | 265 | ||
214 | struct rbd_image_header header; | 266 | struct rbd_image_header header; |
215 | bool exists; | 267 | unsigned long flags; /* possibly lock protected */ |
216 | struct rbd_spec *spec; | 268 | struct rbd_spec *spec; |
217 | 269 | ||
218 | char *header_name; | 270 | char *header_name; |
219 | 271 | ||
272 | struct ceph_file_layout layout; | ||
273 | |||
220 | struct ceph_osd_event *watch_event; | 274 | struct ceph_osd_event *watch_event; |
221 | struct ceph_osd_request *watch_request; | 275 | struct rbd_obj_request *watch_request; |
222 | 276 | ||
223 | struct rbd_spec *parent_spec; | 277 | struct rbd_spec *parent_spec; |
224 | u64 parent_overlap; | 278 | u64 parent_overlap; |
@@ -235,7 +289,19 @@ struct rbd_device { | |||
235 | 289 | ||
236 | /* sysfs related */ | 290 | /* sysfs related */ |
237 | struct device dev; | 291 | struct device dev; |
238 | unsigned long open_count; | 292 | unsigned long open_count; /* protected by lock */ |
293 | }; | ||
294 | |||
295 | /* | ||
296 | * Flag bits for rbd_dev->flags. If atomicity is required, | ||
297 | * rbd_dev->lock is used to protect access. | ||
298 | * | ||
299 | * Currently, only the "removing" flag (which is coupled with the | ||
300 | * "open_count" field) requires atomic access. | ||
301 | */ | ||
302 | enum rbd_dev_flags { | ||
303 | RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ | ||
304 | RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ | ||
239 | }; | 305 | }; |
240 | 306 | ||
241 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ | 307 | static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ |
@@ -277,6 +343,33 @@ static struct device rbd_root_dev = { | |||
277 | .release = rbd_root_dev_release, | 343 | .release = rbd_root_dev_release, |
278 | }; | 344 | }; |
279 | 345 | ||
346 | static __printf(2, 3) | ||
347 | void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) | ||
348 | { | ||
349 | struct va_format vaf; | ||
350 | va_list args; | ||
351 | |||
352 | va_start(args, fmt); | ||
353 | vaf.fmt = fmt; | ||
354 | vaf.va = &args; | ||
355 | |||
356 | if (!rbd_dev) | ||
357 | printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); | ||
358 | else if (rbd_dev->disk) | ||
359 | printk(KERN_WARNING "%s: %s: %pV\n", | ||
360 | RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); | ||
361 | else if (rbd_dev->spec && rbd_dev->spec->image_name) | ||
362 | printk(KERN_WARNING "%s: image %s: %pV\n", | ||
363 | RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); | ||
364 | else if (rbd_dev->spec && rbd_dev->spec->image_id) | ||
365 | printk(KERN_WARNING "%s: id %s: %pV\n", | ||
366 | RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); | ||
367 | else /* punt */ | ||
368 | printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", | ||
369 | RBD_DRV_NAME, rbd_dev, &vaf); | ||
370 | va_end(args); | ||
371 | } | ||
372 | |||
280 | #ifdef RBD_DEBUG | 373 | #ifdef RBD_DEBUG |
281 | #define rbd_assert(expr) \ | 374 | #define rbd_assert(expr) \ |
282 | if (unlikely(!(expr))) { \ | 375 | if (unlikely(!(expr))) { \ |
@@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); | |||
296 | static int rbd_open(struct block_device *bdev, fmode_t mode) | 389 | static int rbd_open(struct block_device *bdev, fmode_t mode) |
297 | { | 390 | { |
298 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; | 391 | struct rbd_device *rbd_dev = bdev->bd_disk->private_data; |
392 | bool removing = false; | ||
299 | 393 | ||
300 | if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) | 394 | if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) |
301 | return -EROFS; | 395 | return -EROFS; |
302 | 396 | ||
397 | spin_lock_irq(&rbd_dev->lock); | ||
398 | if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) | ||
399 | removing = true; | ||
400 | else | ||
401 | rbd_dev->open_count++; | ||
402 | spin_unlock_irq(&rbd_dev->lock); | ||
403 | if (removing) | ||
404 | return -ENOENT; | ||
405 | |||
303 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 406 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
304 | (void) get_device(&rbd_dev->dev); | 407 | (void) get_device(&rbd_dev->dev); |
305 | set_device_ro(bdev, rbd_dev->mapping.read_only); | 408 | set_device_ro(bdev, rbd_dev->mapping.read_only); |
306 | rbd_dev->open_count++; | ||
307 | mutex_unlock(&ctl_mutex); | 409 | mutex_unlock(&ctl_mutex); |
308 | 410 | ||
309 | return 0; | 411 | return 0; |
@@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) | |||
312 | static int rbd_release(struct gendisk *disk, fmode_t mode) | 414 | static int rbd_release(struct gendisk *disk, fmode_t mode) |
313 | { | 415 | { |
314 | struct rbd_device *rbd_dev = disk->private_data; | 416 | struct rbd_device *rbd_dev = disk->private_data; |
417 | unsigned long open_count_before; | ||
418 | |||
419 | spin_lock_irq(&rbd_dev->lock); | ||
420 | open_count_before = rbd_dev->open_count--; | ||
421 | spin_unlock_irq(&rbd_dev->lock); | ||
422 | rbd_assert(open_count_before > 0); | ||
315 | 423 | ||
316 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 424 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
317 | rbd_assert(rbd_dev->open_count > 0); | ||
318 | rbd_dev->open_count--; | ||
319 | put_device(&rbd_dev->dev); | 425 | put_device(&rbd_dev->dev); |
320 | mutex_unlock(&ctl_mutex); | 426 | mutex_unlock(&ctl_mutex); |
321 | 427 | ||
@@ -426,6 +532,12 @@ static match_table_t rbd_opts_tokens = { | |||
426 | {-1, NULL} | 532 | {-1, NULL} |
427 | }; | 533 | }; |
428 | 534 | ||
535 | struct rbd_options { | ||
536 | bool read_only; | ||
537 | }; | ||
538 | |||
539 | #define RBD_READ_ONLY_DEFAULT false | ||
540 | |||
429 | static int parse_rbd_opts_token(char *c, void *private) | 541 | static int parse_rbd_opts_token(char *c, void *private) |
430 | { | 542 | { |
431 | struct rbd_options *rbd_opts = private; | 543 | struct rbd_options *rbd_opts = private; |
@@ -512,18 +624,6 @@ static void rbd_put_client(struct rbd_client *rbdc) | |||
512 | kref_put(&rbdc->kref, rbd_client_release); | 624 | kref_put(&rbdc->kref, rbd_client_release); |
513 | } | 625 | } |
514 | 626 | ||
515 | /* | ||
516 | * Destroy requests collection | ||
517 | */ | ||
518 | static void rbd_coll_release(struct kref *kref) | ||
519 | { | ||
520 | struct rbd_req_coll *coll = | ||
521 | container_of(kref, struct rbd_req_coll, kref); | ||
522 | |||
523 | dout("rbd_coll_release %p\n", coll); | ||
524 | kfree(coll); | ||
525 | } | ||
526 | |||
527 | static bool rbd_image_format_valid(u32 image_format) | 627 | static bool rbd_image_format_valid(u32 image_format) |
528 | { | 628 | { |
529 | return image_format == 1 || image_format == 2; | 629 | return image_format == 1 || image_format == 2; |
@@ -707,7 +807,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) | |||
707 | goto done; | 807 | goto done; |
708 | rbd_dev->mapping.read_only = true; | 808 | rbd_dev->mapping.read_only = true; |
709 | } | 809 | } |
710 | rbd_dev->exists = true; | 810 | set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
811 | |||
711 | done: | 812 | done: |
712 | return ret; | 813 | return ret; |
713 | } | 814 | } |
@@ -724,7 +825,7 @@ static void rbd_header_free(struct rbd_image_header *header) | |||
724 | header->snapc = NULL; | 825 | header->snapc = NULL; |
725 | } | 826 | } |
726 | 827 | ||
727 | static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) | 828 | static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) |
728 | { | 829 | { |
729 | char *name; | 830 | char *name; |
730 | u64 segment; | 831 | u64 segment; |
@@ -767,23 +868,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, | |||
767 | return length; | 868 | return length; |
768 | } | 869 | } |
769 | 870 | ||
770 | static int rbd_get_num_segments(struct rbd_image_header *header, | ||
771 | u64 ofs, u64 len) | ||
772 | { | ||
773 | u64 start_seg; | ||
774 | u64 end_seg; | ||
775 | |||
776 | if (!len) | ||
777 | return 0; | ||
778 | if (len - 1 > U64_MAX - ofs) | ||
779 | return -ERANGE; | ||
780 | |||
781 | start_seg = ofs >> header->obj_order; | ||
782 | end_seg = (ofs + len - 1) >> header->obj_order; | ||
783 | |||
784 | return end_seg - start_seg + 1; | ||
785 | } | ||
786 | |||
787 | /* | 871 | /* |
788 | * returns the size of an object in the image | 872 | * returns the size of an object in the image |
789 | */ | 873 | */ |
@@ -949,8 +1033,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, | |||
949 | unsigned int bi_size; | 1033 | unsigned int bi_size; |
950 | struct bio *bio; | 1034 | struct bio *bio; |
951 | 1035 | ||
952 | if (!bi) | 1036 | if (!bi) { |
1037 | rbd_warn(NULL, "bio_chain exhausted with %u left", len); | ||
953 | goto out_err; /* EINVAL; ran out of bio's */ | 1038 | goto out_err; /* EINVAL; ran out of bio's */ |
1039 | } | ||
954 | bi_size = min_t(unsigned int, bi->bi_size - off, len); | 1040 | bi_size = min_t(unsigned int, bi->bi_size - off, len); |
955 | bio = bio_clone_range(bi, off, bi_size, gfpmask); | 1041 | bio = bio_clone_range(bi, off, bi_size, gfpmask); |
956 | if (!bio) | 1042 | if (!bio) |
@@ -976,399 +1062,665 @@ out_err: | |||
976 | return NULL; | 1062 | return NULL; |
977 | } | 1063 | } |
978 | 1064 | ||
979 | /* | 1065 | static void rbd_obj_request_get(struct rbd_obj_request *obj_request) |
980 | * helpers for osd request op vectors. | ||
981 | */ | ||
982 | static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, | ||
983 | int opcode, u32 payload_len) | ||
984 | { | 1066 | { |
985 | struct ceph_osd_req_op *ops; | 1067 | kref_get(&obj_request->kref); |
1068 | } | ||
986 | 1069 | ||
987 | ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); | 1070 | static void rbd_obj_request_destroy(struct kref *kref); |
988 | if (!ops) | 1071 | static void rbd_obj_request_put(struct rbd_obj_request *obj_request) |
1072 | { | ||
1073 | rbd_assert(obj_request != NULL); | ||
1074 | kref_put(&obj_request->kref, rbd_obj_request_destroy); | ||
1075 | } | ||
1076 | |||
1077 | static void rbd_img_request_get(struct rbd_img_request *img_request) | ||
1078 | { | ||
1079 | kref_get(&img_request->kref); | ||
1080 | } | ||
1081 | |||
1082 | static void rbd_img_request_destroy(struct kref *kref); | ||
1083 | static void rbd_img_request_put(struct rbd_img_request *img_request) | ||
1084 | { | ||
1085 | rbd_assert(img_request != NULL); | ||
1086 | kref_put(&img_request->kref, rbd_img_request_destroy); | ||
1087 | } | ||
1088 | |||
1089 | static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, | ||
1090 | struct rbd_obj_request *obj_request) | ||
1091 | { | ||
1092 | rbd_assert(obj_request->img_request == NULL); | ||
1093 | |||
1094 | rbd_obj_request_get(obj_request); | ||
1095 | obj_request->img_request = img_request; | ||
1096 | obj_request->which = img_request->obj_request_count; | ||
1097 | rbd_assert(obj_request->which != BAD_WHICH); | ||
1098 | img_request->obj_request_count++; | ||
1099 | list_add_tail(&obj_request->links, &img_request->obj_requests); | ||
1100 | } | ||
1101 | |||
1102 | static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, | ||
1103 | struct rbd_obj_request *obj_request) | ||
1104 | { | ||
1105 | rbd_assert(obj_request->which != BAD_WHICH); | ||
1106 | |||
1107 | list_del(&obj_request->links); | ||
1108 | rbd_assert(img_request->obj_request_count > 0); | ||
1109 | img_request->obj_request_count--; | ||
1110 | rbd_assert(obj_request->which == img_request->obj_request_count); | ||
1111 | obj_request->which = BAD_WHICH; | ||
1112 | rbd_assert(obj_request->img_request == img_request); | ||
1113 | obj_request->img_request = NULL; | ||
1114 | obj_request->callback = NULL; | ||
1115 | rbd_obj_request_put(obj_request); | ||
1116 | } | ||
1117 | |||
1118 | static bool obj_request_type_valid(enum obj_request_type type) | ||
1119 | { | ||
1120 | switch (type) { | ||
1121 | case OBJ_REQUEST_NODATA: | ||
1122 | case OBJ_REQUEST_BIO: | ||
1123 | case OBJ_REQUEST_PAGES: | ||
1124 | return true; | ||
1125 | default: | ||
1126 | return false; | ||
1127 | } | ||
1128 | } | ||
1129 | |||
1130 | struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) | ||
1131 | { | ||
1132 | struct ceph_osd_req_op *op; | ||
1133 | va_list args; | ||
1134 | size_t size; | ||
1135 | |||
1136 | op = kzalloc(sizeof (*op), GFP_NOIO); | ||
1137 | if (!op) | ||
989 | return NULL; | 1138 | return NULL; |
1139 | op->op = opcode; | ||
1140 | va_start(args, opcode); | ||
1141 | switch (opcode) { | ||
1142 | case CEPH_OSD_OP_READ: | ||
1143 | case CEPH_OSD_OP_WRITE: | ||
1144 | /* rbd_osd_req_op_create(READ, offset, length) */ | ||
1145 | /* rbd_osd_req_op_create(WRITE, offset, length) */ | ||
1146 | op->extent.offset = va_arg(args, u64); | ||
1147 | op->extent.length = va_arg(args, u64); | ||
1148 | if (opcode == CEPH_OSD_OP_WRITE) | ||
1149 | op->payload_len = op->extent.length; | ||
1150 | break; | ||
1151 | case CEPH_OSD_OP_STAT: | ||
1152 | break; | ||
1153 | case CEPH_OSD_OP_CALL: | ||
1154 | /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ | ||
1155 | op->cls.class_name = va_arg(args, char *); | ||
1156 | size = strlen(op->cls.class_name); | ||
1157 | rbd_assert(size <= (size_t) U8_MAX); | ||
1158 | op->cls.class_len = size; | ||
1159 | op->payload_len = size; | ||
1160 | |||
1161 | op->cls.method_name = va_arg(args, char *); | ||
1162 | size = strlen(op->cls.method_name); | ||
1163 | rbd_assert(size <= (size_t) U8_MAX); | ||
1164 | op->cls.method_len = size; | ||
1165 | op->payload_len += size; | ||
1166 | |||
1167 | op->cls.argc = 0; | ||
1168 | op->cls.indata = va_arg(args, void *); | ||
1169 | size = va_arg(args, size_t); | ||
1170 | rbd_assert(size <= (size_t) U32_MAX); | ||
1171 | op->cls.indata_len = (u32) size; | ||
1172 | op->payload_len += size; | ||
1173 | break; | ||
1174 | case CEPH_OSD_OP_NOTIFY_ACK: | ||
1175 | case CEPH_OSD_OP_WATCH: | ||
1176 | /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ | ||
1177 | /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ | ||
1178 | op->watch.cookie = va_arg(args, u64); | ||
1179 | op->watch.ver = va_arg(args, u64); | ||
1180 | op->watch.ver = cpu_to_le64(op->watch.ver); | ||
1181 | if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) | ||
1182 | op->watch.flag = (u8) 1; | ||
1183 | break; | ||
1184 | default: | ||
1185 | rbd_warn(NULL, "unsupported opcode %hu\n", opcode); | ||
1186 | kfree(op); | ||
1187 | op = NULL; | ||
1188 | break; | ||
1189 | } | ||
1190 | va_end(args); | ||
990 | 1191 | ||
991 | ops[0].op = opcode; | 1192 | return op; |
1193 | } | ||
992 | 1194 | ||
993 | /* | 1195 | static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) |
994 | * op extent offset and length will be set later on | 1196 | { |
995 | * in calc_raw_layout() | 1197 | kfree(op); |
996 | */ | 1198 | } |
997 | ops[0].payload_len = payload_len; | ||
998 | 1199 | ||
999 | return ops; | 1200 | static int rbd_obj_request_submit(struct ceph_osd_client *osdc, |
1201 | struct rbd_obj_request *obj_request) | ||
1202 | { | ||
1203 | return ceph_osdc_start_request(osdc, obj_request->osd_req, false); | ||
1000 | } | 1204 | } |
1001 | 1205 | ||
1002 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) | 1206 | static void rbd_img_request_complete(struct rbd_img_request *img_request) |
1003 | { | 1207 | { |
1004 | kfree(ops); | 1208 | if (img_request->callback) |
1209 | img_request->callback(img_request); | ||
1210 | else | ||
1211 | rbd_img_request_put(img_request); | ||
1005 | } | 1212 | } |
1006 | 1213 | ||
1007 | static void rbd_coll_end_req_index(struct request *rq, | 1214 | /* Caller is responsible for rbd_obj_request_destroy(obj_request) */ |
1008 | struct rbd_req_coll *coll, | 1215 | |
1009 | int index, | 1216 | static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) |
1010 | int ret, u64 len) | ||
1011 | { | 1217 | { |
1012 | struct request_queue *q; | 1218 | return wait_for_completion_interruptible(&obj_request->completion); |
1013 | int min, max, i; | 1219 | } |
1014 | 1220 | ||
1015 | dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", | 1221 | static void obj_request_done_init(struct rbd_obj_request *obj_request) |
1016 | coll, index, ret, (unsigned long long) len); | 1222 | { |
1223 | atomic_set(&obj_request->done, 0); | ||
1224 | smp_wmb(); | ||
1225 | } | ||
1017 | 1226 | ||
1018 | if (!rq) | 1227 | static void obj_request_done_set(struct rbd_obj_request *obj_request) |
1019 | return; | 1228 | { |
1229 | atomic_set(&obj_request->done, 1); | ||
1230 | smp_wmb(); | ||
1231 | } | ||
1020 | 1232 | ||
1021 | if (!coll) { | 1233 | static bool obj_request_done_test(struct rbd_obj_request *obj_request) |
1022 | blk_end_request(rq, ret, len); | 1234 | { |
1023 | return; | 1235 | smp_rmb(); |
1024 | } | 1236 | return atomic_read(&obj_request->done) != 0; |
1237 | } | ||
1238 | |||
1239 | static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request, | ||
1240 | struct ceph_osd_op *op) | ||
1241 | { | ||
1242 | obj_request_done_set(obj_request); | ||
1243 | } | ||
1025 | 1244 | ||
1026 | q = rq->q; | 1245 | static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) |
1027 | 1246 | { | |
1028 | spin_lock_irq(q->queue_lock); | 1247 | if (obj_request->callback) |
1029 | coll->status[index].done = 1; | 1248 | obj_request->callback(obj_request); |
1030 | coll->status[index].rc = ret; | 1249 | else |
1031 | coll->status[index].bytes = len; | 1250 | complete_all(&obj_request->completion); |
1032 | max = min = coll->num_done; | 1251 | } |
1033 | while (max < coll->total && coll->status[max].done) | 1252 | |
1034 | max++; | 1253 | static void rbd_osd_read_callback(struct rbd_obj_request *obj_request, |
1035 | 1254 | struct ceph_osd_op *op) | |
1036 | for (i = min; i<max; i++) { | 1255 | { |
1037 | __blk_end_request(rq, coll->status[i].rc, | 1256 | u64 xferred; |
1038 | coll->status[i].bytes); | 1257 | |
1039 | coll->num_done++; | 1258 | /* |
1040 | kref_put(&coll->kref, rbd_coll_release); | 1259 | * We support a 64-bit length, but ultimately it has to be |
1260 | * passed to blk_end_request(), which takes an unsigned int. | ||
1261 | */ | ||
1262 | xferred = le64_to_cpu(op->extent.length); | ||
1263 | rbd_assert(xferred < (u64) UINT_MAX); | ||
1264 | if (obj_request->result == (s32) -ENOENT) { | ||
1265 | zero_bio_chain(obj_request->bio_list, 0); | ||
1266 | obj_request->result = 0; | ||
1267 | } else if (xferred < obj_request->length && !obj_request->result) { | ||
1268 | zero_bio_chain(obj_request->bio_list, xferred); | ||
1269 | xferred = obj_request->length; | ||
1041 | } | 1270 | } |
1042 | spin_unlock_irq(q->queue_lock); | 1271 | obj_request->xferred = xferred; |
1272 | obj_request_done_set(obj_request); | ||
1043 | } | 1273 | } |
1044 | 1274 | ||
1045 | static void rbd_coll_end_req(struct rbd_request *req, | 1275 | static void rbd_osd_write_callback(struct rbd_obj_request *obj_request, |
1046 | int ret, u64 len) | 1276 | struct ceph_osd_op *op) |
1047 | { | 1277 | { |
1048 | rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); | 1278 | obj_request->xferred = le64_to_cpu(op->extent.length); |
1279 | obj_request_done_set(obj_request); | ||
1049 | } | 1280 | } |
1050 | 1281 | ||
1051 | /* | 1282 | /* |
1052 | * Send ceph osd request | 1283 | * For a simple stat call there's nothing to do. We'll do more if |
1284 | * this is part of a write sequence for a layered image. | ||
1053 | */ | 1285 | */ |
1054 | static int rbd_do_request(struct request *rq, | 1286 | static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request, |
1055 | struct rbd_device *rbd_dev, | 1287 | struct ceph_osd_op *op) |
1056 | struct ceph_snap_context *snapc, | 1288 | { |
1057 | u64 snapid, | 1289 | obj_request_done_set(obj_request); |
1058 | const char *object_name, u64 ofs, u64 len, | 1290 | } |
1059 | struct bio *bio, | ||
1060 | struct page **pages, | ||
1061 | int num_pages, | ||
1062 | int flags, | ||
1063 | struct ceph_osd_req_op *ops, | ||
1064 | struct rbd_req_coll *coll, | ||
1065 | int coll_index, | ||
1066 | void (*rbd_cb)(struct ceph_osd_request *req, | ||
1067 | struct ceph_msg *msg), | ||
1068 | struct ceph_osd_request **linger_req, | ||
1069 | u64 *ver) | ||
1070 | { | ||
1071 | struct ceph_osd_request *req; | ||
1072 | struct ceph_file_layout *layout; | ||
1073 | int ret; | ||
1074 | u64 bno; | ||
1075 | struct timespec mtime = CURRENT_TIME; | ||
1076 | struct rbd_request *req_data; | ||
1077 | struct ceph_osd_request_head *reqhead; | ||
1078 | struct ceph_osd_client *osdc; | ||
1079 | 1291 | ||
1080 | req_data = kzalloc(sizeof(*req_data), GFP_NOIO); | 1292 | static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, |
1081 | if (!req_data) { | 1293 | struct ceph_msg *msg) |
1082 | if (coll) | 1294 | { |
1083 | rbd_coll_end_req_index(rq, coll, coll_index, | 1295 | struct rbd_obj_request *obj_request = osd_req->r_priv; |
1084 | -ENOMEM, len); | 1296 | struct ceph_osd_reply_head *reply_head; |
1085 | return -ENOMEM; | 1297 | struct ceph_osd_op *op; |
1298 | u32 num_ops; | ||
1299 | u16 opcode; | ||
1300 | |||
1301 | rbd_assert(osd_req == obj_request->osd_req); | ||
1302 | rbd_assert(!!obj_request->img_request ^ | ||
1303 | (obj_request->which == BAD_WHICH)); | ||
1304 | |||
1305 | obj_request->xferred = le32_to_cpu(msg->hdr.data_len); | ||
1306 | reply_head = msg->front.iov_base; | ||
1307 | obj_request->result = (s32) le32_to_cpu(reply_head->result); | ||
1308 | obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); | ||
1309 | |||
1310 | num_ops = le32_to_cpu(reply_head->num_ops); | ||
1311 | WARN_ON(num_ops != 1); /* For now */ | ||
1312 | |||
1313 | op = &reply_head->ops[0]; | ||
1314 | opcode = le16_to_cpu(op->op); | ||
1315 | switch (opcode) { | ||
1316 | case CEPH_OSD_OP_READ: | ||
1317 | rbd_osd_read_callback(obj_request, op); | ||
1318 | break; | ||
1319 | case CEPH_OSD_OP_WRITE: | ||
1320 | rbd_osd_write_callback(obj_request, op); | ||
1321 | break; | ||
1322 | case CEPH_OSD_OP_STAT: | ||
1323 | rbd_osd_stat_callback(obj_request, op); | ||
1324 | break; | ||
1325 | case CEPH_OSD_OP_CALL: | ||
1326 | case CEPH_OSD_OP_NOTIFY_ACK: | ||
1327 | case CEPH_OSD_OP_WATCH: | ||
1328 | rbd_osd_trivial_callback(obj_request, op); | ||
1329 | break; | ||
1330 | default: | ||
1331 | rbd_warn(NULL, "%s: unsupported op %hu\n", | ||
1332 | obj_request->object_name, (unsigned short) opcode); | ||
1333 | break; | ||
1086 | } | 1334 | } |
1087 | 1335 | ||
1088 | if (coll) { | 1336 | if (obj_request_done_test(obj_request)) |
1089 | req_data->coll = coll; | 1337 | rbd_obj_request_complete(obj_request); |
1090 | req_data->coll_index = coll_index; | 1338 | } |
1339 | |||
1340 | static struct ceph_osd_request *rbd_osd_req_create( | ||
1341 | struct rbd_device *rbd_dev, | ||
1342 | bool write_request, | ||
1343 | struct rbd_obj_request *obj_request, | ||
1344 | struct ceph_osd_req_op *op) | ||
1345 | { | ||
1346 | struct rbd_img_request *img_request = obj_request->img_request; | ||
1347 | struct ceph_snap_context *snapc = NULL; | ||
1348 | struct ceph_osd_client *osdc; | ||
1349 | struct ceph_osd_request *osd_req; | ||
1350 | struct timespec now; | ||
1351 | struct timespec *mtime; | ||
1352 | u64 snap_id = CEPH_NOSNAP; | ||
1353 | u64 offset = obj_request->offset; | ||
1354 | u64 length = obj_request->length; | ||
1355 | |||
1356 | if (img_request) { | ||
1357 | rbd_assert(img_request->write_request == write_request); | ||
1358 | if (img_request->write_request) | ||
1359 | snapc = img_request->snapc; | ||
1360 | else | ||
1361 | snap_id = img_request->snap_id; | ||
1091 | } | 1362 | } |
1092 | 1363 | ||
1093 | dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", | 1364 | /* Allocate and initialize the request, for the single op */ |
1094 | object_name, (unsigned long long) ofs, | ||
1095 | (unsigned long long) len, coll, coll_index); | ||
1096 | 1365 | ||
1097 | osdc = &rbd_dev->rbd_client->client->osdc; | 1366 | osdc = &rbd_dev->rbd_client->client->osdc; |
1098 | req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, | 1367 | osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); |
1099 | false, GFP_NOIO, pages, bio); | 1368 | if (!osd_req) |
1100 | if (!req) { | 1369 | return NULL; /* ENOMEM */ |
1101 | ret = -ENOMEM; | 1370 | |
1102 | goto done_pages; | 1371 | rbd_assert(obj_request_type_valid(obj_request->type)); |
1372 | switch (obj_request->type) { | ||
1373 | case OBJ_REQUEST_NODATA: | ||
1374 | break; /* Nothing to do */ | ||
1375 | case OBJ_REQUEST_BIO: | ||
1376 | rbd_assert(obj_request->bio_list != NULL); | ||
1377 | osd_req->r_bio = obj_request->bio_list; | ||
1378 | break; | ||
1379 | case OBJ_REQUEST_PAGES: | ||
1380 | osd_req->r_pages = obj_request->pages; | ||
1381 | osd_req->r_num_pages = obj_request->page_count; | ||
1382 | osd_req->r_page_alignment = offset & ~PAGE_MASK; | ||
1383 | break; | ||
1103 | } | 1384 | } |
1104 | 1385 | ||
1105 | req->r_callback = rbd_cb; | 1386 | if (write_request) { |
1387 | osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; | ||
1388 | now = CURRENT_TIME; | ||
1389 | mtime = &now; | ||
1390 | } else { | ||
1391 | osd_req->r_flags = CEPH_OSD_FLAG_READ; | ||
1392 | mtime = NULL; /* not needed for reads */ | ||
1393 | offset = 0; /* These are not used... */ | ||
1394 | length = 0; /* ...for osd read requests */ | ||
1395 | } | ||
1106 | 1396 | ||
1107 | req_data->rq = rq; | 1397 | osd_req->r_callback = rbd_osd_req_callback; |
1108 | req_data->bio = bio; | 1398 | osd_req->r_priv = obj_request; |
1109 | req_data->pages = pages; | ||
1110 | req_data->len = len; | ||
1111 | 1399 | ||
1112 | req->r_priv = req_data; | 1400 | osd_req->r_oid_len = strlen(obj_request->object_name); |
1401 | rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); | ||
1402 | memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); | ||
1113 | 1403 | ||
1114 | reqhead = req->r_request->front.iov_base; | 1404 | osd_req->r_file_layout = rbd_dev->layout; /* struct */ |
1115 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); | ||
1116 | 1405 | ||
1117 | strncpy(req->r_oid, object_name, sizeof(req->r_oid)); | 1406 | /* osd_req will get its own reference to snapc (if non-null) */ |
1118 | req->r_oid_len = strlen(req->r_oid); | ||
1119 | 1407 | ||
1120 | layout = &req->r_file_layout; | 1408 | ceph_osdc_build_request(osd_req, offset, length, 1, op, |
1121 | memset(layout, 0, sizeof(*layout)); | 1409 | snapc, snap_id, mtime); |
1122 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
1123 | layout->fl_stripe_count = cpu_to_le32(1); | ||
1124 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
1125 | layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); | ||
1126 | ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, | ||
1127 | req, ops); | ||
1128 | rbd_assert(ret == 0); | ||
1129 | 1410 | ||
1130 | ceph_osdc_build_request(req, ofs, &len, | 1411 | return osd_req; |
1131 | ops, | 1412 | } |
1132 | snapc, | ||
1133 | &mtime, | ||
1134 | req->r_oid, req->r_oid_len); | ||
1135 | 1413 | ||
1136 | if (linger_req) { | 1414 | static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) |
1137 | ceph_osdc_set_request_linger(osdc, req); | 1415 | { |
1138 | *linger_req = req; | 1416 | ceph_osdc_put_request(osd_req); |
1139 | } | 1417 | } |
1140 | 1418 | ||
1141 | ret = ceph_osdc_start_request(osdc, req, false); | 1419 | /* object_name is assumed to be a non-null pointer and NUL-terminated */ |
1142 | if (ret < 0) | 1420 | |
1143 | goto done_err; | 1421 | static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, |
1144 | 1422 | u64 offset, u64 length, | |
1145 | if (!rbd_cb) { | 1423 | enum obj_request_type type) |
1146 | ret = ceph_osdc_wait_request(osdc, req); | 1424 | { |
1147 | if (ver) | 1425 | struct rbd_obj_request *obj_request; |
1148 | *ver = le64_to_cpu(req->r_reassert_version.version); | 1426 | size_t size; |
1149 | dout("reassert_ver=%llu\n", | 1427 | char *name; |
1150 | (unsigned long long) | 1428 | |
1151 | le64_to_cpu(req->r_reassert_version.version)); | 1429 | rbd_assert(obj_request_type_valid(type)); |
1152 | ceph_osdc_put_request(req); | 1430 | |
1431 | size = strlen(object_name) + 1; | ||
1432 | obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); | ||
1433 | if (!obj_request) | ||
1434 | return NULL; | ||
1435 | |||
1436 | name = (char *)(obj_request + 1); | ||
1437 | obj_request->object_name = memcpy(name, object_name, size); | ||
1438 | obj_request->offset = offset; | ||
1439 | obj_request->length = length; | ||
1440 | obj_request->which = BAD_WHICH; | ||
1441 | obj_request->type = type; | ||
1442 | INIT_LIST_HEAD(&obj_request->links); | ||
1443 | obj_request_done_init(obj_request); | ||
1444 | init_completion(&obj_request->completion); | ||
1445 | kref_init(&obj_request->kref); | ||
1446 | |||
1447 | return obj_request; | ||
1448 | } | ||
1449 | |||
1450 | static void rbd_obj_request_destroy(struct kref *kref) | ||
1451 | { | ||
1452 | struct rbd_obj_request *obj_request; | ||
1453 | |||
1454 | obj_request = container_of(kref, struct rbd_obj_request, kref); | ||
1455 | |||
1456 | rbd_assert(obj_request->img_request == NULL); | ||
1457 | rbd_assert(obj_request->which == BAD_WHICH); | ||
1458 | |||
1459 | if (obj_request->osd_req) | ||
1460 | rbd_osd_req_destroy(obj_request->osd_req); | ||
1461 | |||
1462 | rbd_assert(obj_request_type_valid(obj_request->type)); | ||
1463 | switch (obj_request->type) { | ||
1464 | case OBJ_REQUEST_NODATA: | ||
1465 | break; /* Nothing to do */ | ||
1466 | case OBJ_REQUEST_BIO: | ||
1467 | if (obj_request->bio_list) | ||
1468 | bio_chain_put(obj_request->bio_list); | ||
1469 | break; | ||
1470 | case OBJ_REQUEST_PAGES: | ||
1471 | if (obj_request->pages) | ||
1472 | ceph_release_page_vector(obj_request->pages, | ||
1473 | obj_request->page_count); | ||
1474 | break; | ||
1153 | } | 1475 | } |
1154 | return ret; | ||
1155 | 1476 | ||
1156 | done_err: | 1477 | kfree(obj_request); |
1157 | bio_chain_put(req_data->bio); | ||
1158 | ceph_osdc_put_request(req); | ||
1159 | done_pages: | ||
1160 | rbd_coll_end_req(req_data, ret, len); | ||
1161 | kfree(req_data); | ||
1162 | return ret; | ||
1163 | } | 1478 | } |
1164 | 1479 | ||
1165 | /* | 1480 | /* |
1166 | * Ceph osd op callback | 1481 | * Caller is responsible for filling in the list of object requests |
1482 | * that comprises the image request, and the Linux request pointer | ||
1483 | * (if there is one). | ||
1167 | */ | 1484 | */ |
1168 | static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | 1485 | struct rbd_img_request *rbd_img_request_create(struct rbd_device *rbd_dev, |
1486 | u64 offset, u64 length, | ||
1487 | bool write_request) | ||
1169 | { | 1488 | { |
1170 | struct rbd_request *req_data = req->r_priv; | 1489 | struct rbd_img_request *img_request; |
1171 | struct ceph_osd_reply_head *replyhead; | 1490 | struct ceph_snap_context *snapc = NULL; |
1172 | struct ceph_osd_op *op; | ||
1173 | __s32 rc; | ||
1174 | u64 bytes; | ||
1175 | int read_op; | ||
1176 | |||
1177 | /* parse reply */ | ||
1178 | replyhead = msg->front.iov_base; | ||
1179 | WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); | ||
1180 | op = (void *)(replyhead + 1); | ||
1181 | rc = le32_to_cpu(replyhead->result); | ||
1182 | bytes = le64_to_cpu(op->extent.length); | ||
1183 | read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); | ||
1184 | |||
1185 | dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", | ||
1186 | (unsigned long long) bytes, read_op, (int) rc); | ||
1187 | |||
1188 | if (rc == -ENOENT && read_op) { | ||
1189 | zero_bio_chain(req_data->bio, 0); | ||
1190 | rc = 0; | ||
1191 | } else if (rc == 0 && read_op && bytes < req_data->len) { | ||
1192 | zero_bio_chain(req_data->bio, bytes); | ||
1193 | bytes = req_data->len; | ||
1194 | } | ||
1195 | 1491 | ||
1196 | rbd_coll_end_req(req_data, rc, bytes); | 1492 | img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); |
1493 | if (!img_request) | ||
1494 | return NULL; | ||
1197 | 1495 | ||
1198 | if (req_data->bio) | 1496 | if (write_request) { |
1199 | bio_chain_put(req_data->bio); | 1497 | down_read(&rbd_dev->header_rwsem); |
1498 | snapc = ceph_get_snap_context(rbd_dev->header.snapc); | ||
1499 | up_read(&rbd_dev->header_rwsem); | ||
1500 | if (WARN_ON(!snapc)) { | ||
1501 | kfree(img_request); | ||
1502 | return NULL; /* Shouldn't happen */ | ||
1503 | } | ||
1504 | } | ||
1200 | 1505 | ||
1201 | ceph_osdc_put_request(req); | 1506 | img_request->rq = NULL; |
1202 | kfree(req_data); | 1507 | img_request->rbd_dev = rbd_dev; |
1508 | img_request->offset = offset; | ||
1509 | img_request->length = length; | ||
1510 | img_request->write_request = write_request; | ||
1511 | if (write_request) | ||
1512 | img_request->snapc = snapc; | ||
1513 | else | ||
1514 | img_request->snap_id = rbd_dev->spec->snap_id; | ||
1515 | spin_lock_init(&img_request->completion_lock); | ||
1516 | img_request->next_completion = 0; | ||
1517 | img_request->callback = NULL; | ||
1518 | img_request->obj_request_count = 0; | ||
1519 | INIT_LIST_HEAD(&img_request->obj_requests); | ||
1520 | kref_init(&img_request->kref); | ||
1521 | |||
1522 | rbd_img_request_get(img_request); /* Avoid a warning */ | ||
1523 | rbd_img_request_put(img_request); /* TEMPORARY */ | ||
1524 | |||
1525 | return img_request; | ||
1203 | } | 1526 | } |
1204 | 1527 | ||
1205 | static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | 1528 | static void rbd_img_request_destroy(struct kref *kref) |
1206 | { | 1529 | { |
1207 | ceph_osdc_put_request(req); | 1530 | struct rbd_img_request *img_request; |
1531 | struct rbd_obj_request *obj_request; | ||
1532 | struct rbd_obj_request *next_obj_request; | ||
1533 | |||
1534 | img_request = container_of(kref, struct rbd_img_request, kref); | ||
1535 | |||
1536 | for_each_obj_request_safe(img_request, obj_request, next_obj_request) | ||
1537 | rbd_img_obj_request_del(img_request, obj_request); | ||
1538 | rbd_assert(img_request->obj_request_count == 0); | ||
1539 | |||
1540 | if (img_request->write_request) | ||
1541 | ceph_put_snap_context(img_request->snapc); | ||
1542 | |||
1543 | kfree(img_request); | ||
1208 | } | 1544 | } |
1209 | 1545 | ||
1210 | /* | 1546 | static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, |
1211 | * Do a synchronous ceph osd operation | 1547 | struct bio *bio_list) |
1212 | */ | ||
1213 | static int rbd_req_sync_op(struct rbd_device *rbd_dev, | ||
1214 | struct ceph_snap_context *snapc, | ||
1215 | u64 snapid, | ||
1216 | int flags, | ||
1217 | struct ceph_osd_req_op *ops, | ||
1218 | const char *object_name, | ||
1219 | u64 ofs, u64 inbound_size, | ||
1220 | char *inbound, | ||
1221 | struct ceph_osd_request **linger_req, | ||
1222 | u64 *ver) | ||
1223 | { | 1548 | { |
1224 | int ret; | 1549 | struct rbd_device *rbd_dev = img_request->rbd_dev; |
1225 | struct page **pages; | 1550 | struct rbd_obj_request *obj_request = NULL; |
1226 | int num_pages; | 1551 | struct rbd_obj_request *next_obj_request; |
1227 | 1552 | unsigned int bio_offset; | |
1228 | rbd_assert(ops != NULL); | 1553 | u64 image_offset; |
1554 | u64 resid; | ||
1555 | u16 opcode; | ||
1556 | |||
1557 | opcode = img_request->write_request ? CEPH_OSD_OP_WRITE | ||
1558 | : CEPH_OSD_OP_READ; | ||
1559 | bio_offset = 0; | ||
1560 | image_offset = img_request->offset; | ||
1561 | rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); | ||
1562 | resid = img_request->length; | ||
1563 | while (resid) { | ||
1564 | const char *object_name; | ||
1565 | unsigned int clone_size; | ||
1566 | struct ceph_osd_req_op *op; | ||
1567 | u64 offset; | ||
1568 | u64 length; | ||
1569 | |||
1570 | object_name = rbd_segment_name(rbd_dev, image_offset); | ||
1571 | if (!object_name) | ||
1572 | goto out_unwind; | ||
1573 | offset = rbd_segment_offset(rbd_dev, image_offset); | ||
1574 | length = rbd_segment_length(rbd_dev, image_offset, resid); | ||
1575 | obj_request = rbd_obj_request_create(object_name, | ||
1576 | offset, length, | ||
1577 | OBJ_REQUEST_BIO); | ||
1578 | kfree(object_name); /* object request has its own copy */ | ||
1579 | if (!obj_request) | ||
1580 | goto out_unwind; | ||
1581 | |||
1582 | rbd_assert(length <= (u64) UINT_MAX); | ||
1583 | clone_size = (unsigned int) length; | ||
1584 | obj_request->bio_list = bio_chain_clone_range(&bio_list, | ||
1585 | &bio_offset, clone_size, | ||
1586 | GFP_ATOMIC); | ||
1587 | if (!obj_request->bio_list) | ||
1588 | goto out_partial; | ||
1229 | 1589 | ||
1230 | num_pages = calc_pages_for(ofs, inbound_size); | 1590 | /* |
1231 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | 1591 | * Build up the op to use in building the osd |
1232 | if (IS_ERR(pages)) | 1592 | * request. Note that the contents of the op are |
1233 | return PTR_ERR(pages); | 1593 | * copied by rbd_osd_req_create(). |
1594 | */ | ||
1595 | op = rbd_osd_req_op_create(opcode, offset, length); | ||
1596 | if (!op) | ||
1597 | goto out_partial; | ||
1598 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, | ||
1599 | img_request->write_request, | ||
1600 | obj_request, op); | ||
1601 | rbd_osd_req_op_destroy(op); | ||
1602 | if (!obj_request->osd_req) | ||
1603 | goto out_partial; | ||
1604 | /* status and version are initially zero-filled */ | ||
1605 | |||
1606 | rbd_img_obj_request_add(img_request, obj_request); | ||
1607 | |||
1608 | image_offset += length; | ||
1609 | resid -= length; | ||
1610 | } | ||
1234 | 1611 | ||
1235 | ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, | 1612 | return 0; |
1236 | object_name, ofs, inbound_size, NULL, | ||
1237 | pages, num_pages, | ||
1238 | flags, | ||
1239 | ops, | ||
1240 | NULL, 0, | ||
1241 | NULL, | ||
1242 | linger_req, ver); | ||
1243 | if (ret < 0) | ||
1244 | goto done; | ||
1245 | 1613 | ||
1246 | if ((flags & CEPH_OSD_FLAG_READ) && inbound) | 1614 | out_partial: |
1247 | ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); | 1615 | rbd_obj_request_put(obj_request); |
1616 | out_unwind: | ||
1617 | for_each_obj_request_safe(img_request, obj_request, next_obj_request) | ||
1618 | rbd_obj_request_put(obj_request); | ||
1248 | 1619 | ||
1249 | done: | 1620 | return -ENOMEM; |
1250 | ceph_release_page_vector(pages, num_pages); | ||
1251 | return ret; | ||
1252 | } | 1621 | } |
1253 | 1622 | ||
1254 | /* | 1623 | static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) |
1255 | * Do an asynchronous ceph osd operation | 1624 | { |
1256 | */ | 1625 | struct rbd_img_request *img_request; |
1257 | static int rbd_do_op(struct request *rq, | 1626 | u32 which = obj_request->which; |
1258 | struct rbd_device *rbd_dev, | 1627 | bool more = true; |
1259 | struct ceph_snap_context *snapc, | 1628 | |
1260 | u64 ofs, u64 len, | 1629 | img_request = obj_request->img_request; |
1261 | struct bio *bio, | 1630 | rbd_assert(img_request != NULL); |
1262 | struct rbd_req_coll *coll, | 1631 | rbd_assert(img_request->rq != NULL); |
1263 | int coll_index) | 1632 | rbd_assert(which != BAD_WHICH); |
1264 | { | 1633 | rbd_assert(which < img_request->obj_request_count); |
1265 | char *seg_name; | 1634 | rbd_assert(which >= img_request->next_completion); |
1266 | u64 seg_ofs; | 1635 | |
1267 | u64 seg_len; | 1636 | spin_lock_irq(&img_request->completion_lock); |
1268 | int ret; | 1637 | if (which != img_request->next_completion) |
1269 | struct ceph_osd_req_op *ops; | 1638 | goto out; |
1270 | u32 payload_len; | ||
1271 | int opcode; | ||
1272 | int flags; | ||
1273 | u64 snapid; | ||
1274 | |||
1275 | seg_name = rbd_segment_name(rbd_dev, ofs); | ||
1276 | if (!seg_name) | ||
1277 | return -ENOMEM; | ||
1278 | seg_len = rbd_segment_length(rbd_dev, ofs, len); | ||
1279 | seg_ofs = rbd_segment_offset(rbd_dev, ofs); | ||
1280 | |||
1281 | if (rq_data_dir(rq) == WRITE) { | ||
1282 | opcode = CEPH_OSD_OP_WRITE; | ||
1283 | flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; | ||
1284 | snapid = CEPH_NOSNAP; | ||
1285 | payload_len = seg_len; | ||
1286 | } else { | ||
1287 | opcode = CEPH_OSD_OP_READ; | ||
1288 | flags = CEPH_OSD_FLAG_READ; | ||
1289 | snapc = NULL; | ||
1290 | snapid = rbd_dev->spec->snap_id; | ||
1291 | payload_len = 0; | ||
1292 | } | ||
1293 | 1639 | ||
1294 | ret = -ENOMEM; | 1640 | for_each_obj_request_from(img_request, obj_request) { |
1295 | ops = rbd_create_rw_ops(1, opcode, payload_len); | 1641 | unsigned int xferred; |
1296 | if (!ops) | 1642 | int result; |
1297 | goto done; | ||
1298 | 1643 | ||
1299 | /* we've taken care of segment sizes earlier when we | 1644 | rbd_assert(more); |
1300 | cloned the bios. We should never have a segment | 1645 | rbd_assert(which < img_request->obj_request_count); |
1301 | truncated at this point */ | 1646 | |
1302 | rbd_assert(seg_len == len); | 1647 | if (!obj_request_done_test(obj_request)) |
1303 | 1648 | break; | |
1304 | ret = rbd_do_request(rq, rbd_dev, snapc, snapid, | 1649 | |
1305 | seg_name, seg_ofs, seg_len, | 1650 | rbd_assert(obj_request->xferred <= (u64) UINT_MAX); |
1306 | bio, | 1651 | xferred = (unsigned int) obj_request->xferred; |
1307 | NULL, 0, | 1652 | result = (int) obj_request->result; |
1308 | flags, | 1653 | if (result) |
1309 | ops, | 1654 | rbd_warn(NULL, "obj_request %s result %d xferred %u\n", |
1310 | coll, coll_index, | 1655 | img_request->write_request ? "write" : "read", |
1311 | rbd_req_cb, 0, NULL); | 1656 | result, xferred); |
1312 | 1657 | ||
1313 | rbd_destroy_ops(ops); | 1658 | more = blk_end_request(img_request->rq, result, xferred); |
1314 | done: | 1659 | which++; |
1315 | kfree(seg_name); | 1660 | } |
1316 | return ret; | 1661 | rbd_assert(more ^ (which == img_request->obj_request_count)); |
1662 | img_request->next_completion = which; | ||
1663 | out: | ||
1664 | spin_unlock_irq(&img_request->completion_lock); | ||
1665 | |||
1666 | if (!more) | ||
1667 | rbd_img_request_complete(img_request); | ||
1317 | } | 1668 | } |
1318 | 1669 | ||
1319 | /* | 1670 | static int rbd_img_request_submit(struct rbd_img_request *img_request) |
1320 | * Request sync osd read | 1671 | { |
1321 | */ | 1672 | struct rbd_device *rbd_dev = img_request->rbd_dev; |
1322 | static int rbd_req_sync_read(struct rbd_device *rbd_dev, | 1673 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
1323 | u64 snapid, | 1674 | struct rbd_obj_request *obj_request; |
1324 | const char *object_name, | ||
1325 | u64 ofs, u64 len, | ||
1326 | char *buf, | ||
1327 | u64 *ver) | ||
1328 | { | ||
1329 | struct ceph_osd_req_op *ops; | ||
1330 | int ret; | ||
1331 | 1675 | ||
1332 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); | 1676 | for_each_obj_request(img_request, obj_request) { |
1333 | if (!ops) | 1677 | int ret; |
1334 | return -ENOMEM; | ||
1335 | 1678 | ||
1336 | ret = rbd_req_sync_op(rbd_dev, NULL, | 1679 | obj_request->callback = rbd_img_obj_callback; |
1337 | snapid, | 1680 | ret = rbd_obj_request_submit(osdc, obj_request); |
1338 | CEPH_OSD_FLAG_READ, | 1681 | if (ret) |
1339 | ops, object_name, ofs, len, buf, NULL, ver); | 1682 | return ret; |
1340 | rbd_destroy_ops(ops); | 1683 | /* |
1684 | * The image request has its own reference to each | ||
1685 | * of its object requests, so we can safely drop the | ||
1686 | * initial one here. | ||
1687 | */ | ||
1688 | rbd_obj_request_put(obj_request); | ||
1689 | } | ||
1341 | 1690 | ||
1342 | return ret; | 1691 | return 0; |
1343 | } | 1692 | } |
1344 | 1693 | ||
1345 | /* | 1694 | static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, |
1346 | * Request sync osd watch | 1695 | u64 ver, u64 notify_id) |
1347 | */ | ||
1348 | static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, | ||
1349 | u64 ver, | ||
1350 | u64 notify_id) | ||
1351 | { | 1696 | { |
1352 | struct ceph_osd_req_op *ops; | 1697 | struct rbd_obj_request *obj_request; |
1698 | struct ceph_osd_req_op *op; | ||
1699 | struct ceph_osd_client *osdc; | ||
1353 | int ret; | 1700 | int ret; |
1354 | 1701 | ||
1355 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); | 1702 | obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, |
1356 | if (!ops) | 1703 | OBJ_REQUEST_NODATA); |
1704 | if (!obj_request) | ||
1357 | return -ENOMEM; | 1705 | return -ENOMEM; |
1358 | 1706 | ||
1359 | ops[0].watch.ver = cpu_to_le64(ver); | 1707 | ret = -ENOMEM; |
1360 | ops[0].watch.cookie = notify_id; | 1708 | op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); |
1361 | ops[0].watch.flag = 0; | 1709 | if (!op) |
1710 | goto out; | ||
1711 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, | ||
1712 | obj_request, op); | ||
1713 | rbd_osd_req_op_destroy(op); | ||
1714 | if (!obj_request->osd_req) | ||
1715 | goto out; | ||
1362 | 1716 | ||
1363 | ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, | 1717 | osdc = &rbd_dev->rbd_client->client->osdc; |
1364 | rbd_dev->header_name, 0, 0, NULL, | 1718 | obj_request->callback = rbd_obj_request_put; |
1365 | NULL, 0, | 1719 | ret = rbd_obj_request_submit(osdc, obj_request); |
1366 | CEPH_OSD_FLAG_READ, | 1720 | out: |
1367 | ops, | 1721 | if (ret) |
1368 | NULL, 0, | 1722 | rbd_obj_request_put(obj_request); |
1369 | rbd_simple_req_cb, 0, NULL); | ||
1370 | 1723 | ||
1371 | rbd_destroy_ops(ops); | ||
1372 | return ret; | 1724 | return ret; |
1373 | } | 1725 | } |
1374 | 1726 | ||
@@ -1386,90 +1738,98 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
1386 | (unsigned int) opcode); | 1738 | (unsigned int) opcode); |
1387 | rc = rbd_dev_refresh(rbd_dev, &hver); | 1739 | rc = rbd_dev_refresh(rbd_dev, &hver); |
1388 | if (rc) | 1740 | if (rc) |
1389 | pr_warning(RBD_DRV_NAME "%d got notification but failed to " | 1741 | rbd_warn(rbd_dev, "got notification but failed to " |
1390 | " update snaps: %d\n", rbd_dev->major, rc); | 1742 | " update snaps: %d\n", rc); |
1391 | 1743 | ||
1392 | rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); | 1744 | rbd_obj_notify_ack(rbd_dev, hver, notify_id); |
1393 | } | 1745 | } |
1394 | 1746 | ||
1395 | /* | 1747 | /* |
1396 | * Request sync osd watch | 1748 | * Request sync osd watch/unwatch. The value of "start" determines |
1749 | * whether a watch request is being initiated or torn down. | ||
1397 | */ | 1750 | */ |
1398 | static int rbd_req_sync_watch(struct rbd_device *rbd_dev) | 1751 | static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) |
1399 | { | 1752 | { |
1400 | struct ceph_osd_req_op *ops; | ||
1401 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; | 1753 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
1754 | struct rbd_obj_request *obj_request; | ||
1755 | struct ceph_osd_req_op *op; | ||
1402 | int ret; | 1756 | int ret; |
1403 | 1757 | ||
1404 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); | 1758 | rbd_assert(start ^ !!rbd_dev->watch_event); |
1405 | if (!ops) | 1759 | rbd_assert(start ^ !!rbd_dev->watch_request); |
1406 | return -ENOMEM; | ||
1407 | 1760 | ||
1408 | ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, | 1761 | if (start) { |
1409 | (void *)rbd_dev, &rbd_dev->watch_event); | 1762 | ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, |
1410 | if (ret < 0) | 1763 | &rbd_dev->watch_event); |
1411 | goto fail; | 1764 | if (ret < 0) |
1765 | return ret; | ||
1766 | rbd_assert(rbd_dev->watch_event != NULL); | ||
1767 | } | ||
1412 | 1768 | ||
1413 | ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); | 1769 | ret = -ENOMEM; |
1414 | ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); | 1770 | obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, |
1415 | ops[0].watch.flag = 1; | 1771 | OBJ_REQUEST_NODATA); |
1772 | if (!obj_request) | ||
1773 | goto out_cancel; | ||
1774 | |||
1775 | op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, | ||
1776 | rbd_dev->watch_event->cookie, | ||
1777 | rbd_dev->header.obj_version, start); | ||
1778 | if (!op) | ||
1779 | goto out_cancel; | ||
1780 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, | ||
1781 | obj_request, op); | ||
1782 | rbd_osd_req_op_destroy(op); | ||
1783 | if (!obj_request->osd_req) | ||
1784 | goto out_cancel; | ||
1785 | |||
1786 | if (start) | ||
1787 | ceph_osdc_set_request_linger(osdc, obj_request->osd_req); | ||
1788 | else | ||
1789 | ceph_osdc_unregister_linger_request(osdc, | ||
1790 | rbd_dev->watch_request->osd_req); | ||
1791 | ret = rbd_obj_request_submit(osdc, obj_request); | ||
1792 | if (ret) | ||
1793 | goto out_cancel; | ||
1794 | ret = rbd_obj_request_wait(obj_request); | ||
1795 | if (ret) | ||
1796 | goto out_cancel; | ||
1797 | ret = obj_request->result; | ||
1798 | if (ret) | ||
1799 | goto out_cancel; | ||
1416 | 1800 | ||
1417 | ret = rbd_req_sync_op(rbd_dev, NULL, | 1801 | /* |
1418 | CEPH_NOSNAP, | 1802 | * A watch request is set to linger, so the underlying osd |
1419 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1803 | * request won't go away until we unregister it. We retain |
1420 | ops, | 1804 | * a pointer to the object request during that time (in |
1421 | rbd_dev->header_name, | 1805 | * rbd_dev->watch_request), so we'll keep a reference to |
1422 | 0, 0, NULL, | 1806 | * it. We'll drop that reference (below) after we've |
1423 | &rbd_dev->watch_request, NULL); | 1807 | * unregistered it. |
1808 | */ | ||
1809 | if (start) { | ||
1810 | rbd_dev->watch_request = obj_request; | ||
1424 | 1811 | ||
1425 | if (ret < 0) | 1812 | return 0; |
1426 | goto fail_event; | 1813 | } |
1427 | 1814 | ||
1428 | rbd_destroy_ops(ops); | 1815 | /* We have successfully torn down the watch request */ |
1429 | return 0; | ||
1430 | 1816 | ||
1431 | fail_event: | 1817 | rbd_obj_request_put(rbd_dev->watch_request); |
1818 | rbd_dev->watch_request = NULL; | ||
1819 | out_cancel: | ||
1820 | /* Cancel the event if we're tearing down, or on error */ | ||
1432 | ceph_osdc_cancel_event(rbd_dev->watch_event); | 1821 | ceph_osdc_cancel_event(rbd_dev->watch_event); |
1433 | rbd_dev->watch_event = NULL; | 1822 | rbd_dev->watch_event = NULL; |
1434 | fail: | 1823 | if (obj_request) |
1435 | rbd_destroy_ops(ops); | 1824 | rbd_obj_request_put(obj_request); |
1436 | return ret; | ||
1437 | } | ||
1438 | |||
1439 | /* | ||
1440 | * Request sync osd unwatch | ||
1441 | */ | ||
1442 | static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) | ||
1443 | { | ||
1444 | struct ceph_osd_req_op *ops; | ||
1445 | int ret; | ||
1446 | 1825 | ||
1447 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); | ||
1448 | if (!ops) | ||
1449 | return -ENOMEM; | ||
1450 | |||
1451 | ops[0].watch.ver = 0; | ||
1452 | ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); | ||
1453 | ops[0].watch.flag = 0; | ||
1454 | |||
1455 | ret = rbd_req_sync_op(rbd_dev, NULL, | ||
1456 | CEPH_NOSNAP, | ||
1457 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | ||
1458 | ops, | ||
1459 | rbd_dev->header_name, | ||
1460 | 0, 0, NULL, NULL, NULL); | ||
1461 | |||
1462 | |||
1463 | rbd_destroy_ops(ops); | ||
1464 | ceph_osdc_cancel_event(rbd_dev->watch_event); | ||
1465 | rbd_dev->watch_event = NULL; | ||
1466 | return ret; | 1826 | return ret; |
1467 | } | 1827 | } |
1468 | 1828 | ||
1469 | /* | 1829 | /* |
1470 | * Synchronous osd object method call | 1830 | * Synchronous osd object method call |
1471 | */ | 1831 | */ |
1472 | static int rbd_req_sync_exec(struct rbd_device *rbd_dev, | 1832 | static int rbd_obj_method_sync(struct rbd_device *rbd_dev, |
1473 | const char *object_name, | 1833 | const char *object_name, |
1474 | const char *class_name, | 1834 | const char *class_name, |
1475 | const char *method_name, | 1835 | const char *method_name, |
@@ -1477,169 +1837,143 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, | |||
1477 | size_t outbound_size, | 1837 | size_t outbound_size, |
1478 | char *inbound, | 1838 | char *inbound, |
1479 | size_t inbound_size, | 1839 | size_t inbound_size, |
1480 | int flags, | 1840 | u64 *version) |
1481 | u64 *ver) | ||
1482 | { | 1841 | { |
1483 | struct ceph_osd_req_op *ops; | 1842 | struct rbd_obj_request *obj_request; |
1484 | int class_name_len = strlen(class_name); | 1843 | struct ceph_osd_client *osdc; |
1485 | int method_name_len = strlen(method_name); | 1844 | struct ceph_osd_req_op *op; |
1486 | int payload_size; | 1845 | struct page **pages; |
1846 | u32 page_count; | ||
1487 | int ret; | 1847 | int ret; |
1488 | 1848 | ||
1489 | /* | 1849 | /* |
1490 | * Any input parameters required by the method we're calling | 1850 | * Method calls are ultimately read operations but they |
1491 | * will be sent along with the class and method names as | 1851 | * don't involve object data (so no offset or length). |
1492 | * part of the message payload. That data and its size are | 1852 | * The result should placed into the inbound buffer |
1493 | * supplied via the indata and indata_len fields (named from | 1853 | * provided. They also supply outbound data--parameters for |
1494 | * the perspective of the server side) in the OSD request | 1854 | * the object method. Currently if this is present it will |
1495 | * operation. | 1855 | * be a snapshot id. |
1496 | */ | 1856 | */ |
1497 | payload_size = class_name_len + method_name_len + outbound_size; | 1857 | page_count = (u32) calc_pages_for(0, inbound_size); |
1498 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); | 1858 | pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); |
1499 | if (!ops) | 1859 | if (IS_ERR(pages)) |
1500 | return -ENOMEM; | 1860 | return PTR_ERR(pages); |
1501 | 1861 | ||
1502 | ops[0].cls.class_name = class_name; | 1862 | ret = -ENOMEM; |
1503 | ops[0].cls.class_len = (__u8) class_name_len; | 1863 | obj_request = rbd_obj_request_create(object_name, 0, 0, |
1504 | ops[0].cls.method_name = method_name; | 1864 | OBJ_REQUEST_PAGES); |
1505 | ops[0].cls.method_len = (__u8) method_name_len; | 1865 | if (!obj_request) |
1506 | ops[0].cls.argc = 0; | 1866 | goto out; |
1507 | ops[0].cls.indata = outbound; | ||
1508 | ops[0].cls.indata_len = outbound_size; | ||
1509 | 1867 | ||
1510 | ret = rbd_req_sync_op(rbd_dev, NULL, | 1868 | obj_request->pages = pages; |
1511 | CEPH_NOSNAP, | 1869 | obj_request->page_count = page_count; |
1512 | flags, ops, | ||
1513 | object_name, 0, inbound_size, inbound, | ||
1514 | NULL, ver); | ||
1515 | 1870 | ||
1516 | rbd_destroy_ops(ops); | 1871 | op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, |
1872 | method_name, outbound, outbound_size); | ||
1873 | if (!op) | ||
1874 | goto out; | ||
1875 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, | ||
1876 | obj_request, op); | ||
1877 | rbd_osd_req_op_destroy(op); | ||
1878 | if (!obj_request->osd_req) | ||
1879 | goto out; | ||
1517 | 1880 | ||
1518 | dout("cls_exec returned %d\n", ret); | 1881 | osdc = &rbd_dev->rbd_client->client->osdc; |
1519 | return ret; | 1882 | ret = rbd_obj_request_submit(osdc, obj_request); |
1520 | } | 1883 | if (ret) |
1884 | goto out; | ||
1885 | ret = rbd_obj_request_wait(obj_request); | ||
1886 | if (ret) | ||
1887 | goto out; | ||
1521 | 1888 | ||
1522 | static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) | 1889 | ret = obj_request->result; |
1523 | { | 1890 | if (ret < 0) |
1524 | struct rbd_req_coll *coll = | 1891 | goto out; |
1525 | kzalloc(sizeof(struct rbd_req_coll) + | 1892 | ret = 0; |
1526 | sizeof(struct rbd_req_status) * num_reqs, | 1893 | ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); |
1527 | GFP_ATOMIC); | 1894 | if (version) |
1895 | *version = obj_request->version; | ||
1896 | out: | ||
1897 | if (obj_request) | ||
1898 | rbd_obj_request_put(obj_request); | ||
1899 | else | ||
1900 | ceph_release_page_vector(pages, page_count); | ||
1528 | 1901 | ||
1529 | if (!coll) | 1902 | return ret; |
1530 | return NULL; | ||
1531 | coll->total = num_reqs; | ||
1532 | kref_init(&coll->kref); | ||
1533 | return coll; | ||
1534 | } | 1903 | } |
1535 | 1904 | ||
1536 | /* | 1905 | static void rbd_request_fn(struct request_queue *q) |
1537 | * block device queue callback | ||
1538 | */ | ||
1539 | static void rbd_rq_fn(struct request_queue *q) | ||
1540 | { | 1906 | { |
1541 | struct rbd_device *rbd_dev = q->queuedata; | 1907 | struct rbd_device *rbd_dev = q->queuedata; |
1908 | bool read_only = rbd_dev->mapping.read_only; | ||
1542 | struct request *rq; | 1909 | struct request *rq; |
1910 | int result; | ||
1543 | 1911 | ||
1544 | while ((rq = blk_fetch_request(q))) { | 1912 | while ((rq = blk_fetch_request(q))) { |
1545 | struct bio *bio; | 1913 | bool write_request = rq_data_dir(rq) == WRITE; |
1546 | bool do_write; | 1914 | struct rbd_img_request *img_request; |
1547 | unsigned int size; | 1915 | u64 offset; |
1548 | u64 ofs; | 1916 | u64 length; |
1549 | int num_segs, cur_seg = 0; | 1917 | |
1550 | struct rbd_req_coll *coll; | 1918 | /* Ignore any non-FS requests that filter through. */ |
1551 | struct ceph_snap_context *snapc; | ||
1552 | unsigned int bio_offset; | ||
1553 | |||
1554 | dout("fetched request\n"); | ||
1555 | |||
1556 | /* filter out block requests we don't understand */ | ||
1557 | if ((rq->cmd_type != REQ_TYPE_FS)) { | ||
1558 | __blk_end_request_all(rq, 0); | ||
1559 | continue; | ||
1560 | } | ||
1561 | 1919 | ||
1562 | /* deduce our operation (read, write) */ | 1920 | if (rq->cmd_type != REQ_TYPE_FS) { |
1563 | do_write = (rq_data_dir(rq) == WRITE); | 1921 | __blk_end_request_all(rq, 0); |
1564 | if (do_write && rbd_dev->mapping.read_only) { | ||
1565 | __blk_end_request_all(rq, -EROFS); | ||
1566 | continue; | 1922 | continue; |
1567 | } | 1923 | } |
1568 | 1924 | ||
1569 | spin_unlock_irq(q->queue_lock); | 1925 | spin_unlock_irq(q->queue_lock); |
1570 | 1926 | ||
1571 | down_read(&rbd_dev->header_rwsem); | 1927 | /* Disallow writes to a read-only device */ |
1572 | 1928 | ||
1573 | if (!rbd_dev->exists) { | 1929 | if (write_request) { |
1574 | rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); | 1930 | result = -EROFS; |
1575 | up_read(&rbd_dev->header_rwsem); | 1931 | if (read_only) |
1576 | dout("request for non-existent snapshot"); | 1932 | goto end_request; |
1577 | spin_lock_irq(q->queue_lock); | 1933 | rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); |
1578 | __blk_end_request_all(rq, -ENXIO); | ||
1579 | continue; | ||
1580 | } | 1934 | } |
1581 | 1935 | ||
1582 | snapc = ceph_get_snap_context(rbd_dev->header.snapc); | 1936 | /* |
1583 | 1937 | * Quit early if the mapped snapshot no longer | |
1584 | up_read(&rbd_dev->header_rwsem); | 1938 | * exists. It's still possible the snapshot will |
1585 | 1939 | * have disappeared by the time our request arrives | |
1586 | size = blk_rq_bytes(rq); | 1940 | * at the osd, but there's no sense in sending it if |
1587 | ofs = blk_rq_pos(rq) * SECTOR_SIZE; | 1941 | * we already know. |
1588 | bio = rq->bio; | 1942 | */ |
1589 | 1943 | if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { | |
1590 | dout("%s 0x%x bytes at 0x%llx\n", | 1944 | dout("request for non-existent snapshot"); |
1591 | do_write ? "write" : "read", | 1945 | rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); |
1592 | size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); | 1946 | result = -ENXIO; |
1593 | 1947 | goto end_request; | |
1594 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); | ||
1595 | if (num_segs <= 0) { | ||
1596 | spin_lock_irq(q->queue_lock); | ||
1597 | __blk_end_request_all(rq, num_segs); | ||
1598 | ceph_put_snap_context(snapc); | ||
1599 | continue; | ||
1600 | } | ||
1601 | coll = rbd_alloc_coll(num_segs); | ||
1602 | if (!coll) { | ||
1603 | spin_lock_irq(q->queue_lock); | ||
1604 | __blk_end_request_all(rq, -ENOMEM); | ||
1605 | ceph_put_snap_context(snapc); | ||
1606 | continue; | ||
1607 | } | 1948 | } |
1608 | 1949 | ||
1609 | bio_offset = 0; | 1950 | offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; |
1610 | do { | 1951 | length = (u64) blk_rq_bytes(rq); |
1611 | u64 limit = rbd_segment_length(rbd_dev, ofs, size); | ||
1612 | unsigned int chain_size; | ||
1613 | struct bio *bio_chain; | ||
1614 | 1952 | ||
1615 | BUG_ON(limit > (u64) UINT_MAX); | 1953 | result = -EINVAL; |
1616 | chain_size = (unsigned int) limit; | 1954 | if (WARN_ON(offset && length > U64_MAX - offset + 1)) |
1617 | dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); | 1955 | goto end_request; /* Shouldn't happen */ |
1618 | 1956 | ||
1619 | kref_get(&coll->kref); | 1957 | result = -ENOMEM; |
1958 | img_request = rbd_img_request_create(rbd_dev, offset, length, | ||
1959 | write_request); | ||
1960 | if (!img_request) | ||
1961 | goto end_request; | ||
1620 | 1962 | ||
1621 | /* Pass a cloned bio chain via an osd request */ | 1963 | img_request->rq = rq; |
1622 | |||
1623 | bio_chain = bio_chain_clone_range(&bio, | ||
1624 | &bio_offset, chain_size, | ||
1625 | GFP_ATOMIC); | ||
1626 | if (bio_chain) | ||
1627 | (void) rbd_do_op(rq, rbd_dev, snapc, | ||
1628 | ofs, chain_size, | ||
1629 | bio_chain, coll, cur_seg); | ||
1630 | else | ||
1631 | rbd_coll_end_req_index(rq, coll, cur_seg, | ||
1632 | -ENOMEM, chain_size); | ||
1633 | size -= chain_size; | ||
1634 | ofs += chain_size; | ||
1635 | |||
1636 | cur_seg++; | ||
1637 | } while (size > 0); | ||
1638 | kref_put(&coll->kref, rbd_coll_release); | ||
1639 | 1964 | ||
1965 | result = rbd_img_request_fill_bio(img_request, rq->bio); | ||
1966 | if (!result) | ||
1967 | result = rbd_img_request_submit(img_request); | ||
1968 | if (result) | ||
1969 | rbd_img_request_put(img_request); | ||
1970 | end_request: | ||
1640 | spin_lock_irq(q->queue_lock); | 1971 | spin_lock_irq(q->queue_lock); |
1641 | 1972 | if (result < 0) { | |
1642 | ceph_put_snap_context(snapc); | 1973 | rbd_warn(rbd_dev, "obj_request %s result %d\n", |
1974 | write_request ? "write" : "read", result); | ||
1975 | __blk_end_request_all(rq, result); | ||
1976 | } | ||
1643 | } | 1977 | } |
1644 | } | 1978 | } |
1645 | 1979 | ||
@@ -1703,6 +2037,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) | |||
1703 | put_disk(disk); | 2037 | put_disk(disk); |
1704 | } | 2038 | } |
1705 | 2039 | ||
2040 | static int rbd_obj_read_sync(struct rbd_device *rbd_dev, | ||
2041 | const char *object_name, | ||
2042 | u64 offset, u64 length, | ||
2043 | char *buf, u64 *version) | ||
2044 | |||
2045 | { | ||
2046 | struct ceph_osd_req_op *op; | ||
2047 | struct rbd_obj_request *obj_request; | ||
2048 | struct ceph_osd_client *osdc; | ||
2049 | struct page **pages = NULL; | ||
2050 | u32 page_count; | ||
2051 | size_t size; | ||
2052 | int ret; | ||
2053 | |||
2054 | page_count = (u32) calc_pages_for(offset, length); | ||
2055 | pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); | ||
2056 | if (IS_ERR(pages)) | ||
2057 | ret = PTR_ERR(pages); | ||
2058 | |||
2059 | ret = -ENOMEM; | ||
2060 | obj_request = rbd_obj_request_create(object_name, offset, length, | ||
2061 | OBJ_REQUEST_PAGES); | ||
2062 | if (!obj_request) | ||
2063 | goto out; | ||
2064 | |||
2065 | obj_request->pages = pages; | ||
2066 | obj_request->page_count = page_count; | ||
2067 | |||
2068 | op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); | ||
2069 | if (!op) | ||
2070 | goto out; | ||
2071 | obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, | ||
2072 | obj_request, op); | ||
2073 | rbd_osd_req_op_destroy(op); | ||
2074 | if (!obj_request->osd_req) | ||
2075 | goto out; | ||
2076 | |||
2077 | osdc = &rbd_dev->rbd_client->client->osdc; | ||
2078 | ret = rbd_obj_request_submit(osdc, obj_request); | ||
2079 | if (ret) | ||
2080 | goto out; | ||
2081 | ret = rbd_obj_request_wait(obj_request); | ||
2082 | if (ret) | ||
2083 | goto out; | ||
2084 | |||
2085 | ret = obj_request->result; | ||
2086 | if (ret < 0) | ||
2087 | goto out; | ||
2088 | |||
2089 | rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); | ||
2090 | size = (size_t) obj_request->xferred; | ||
2091 | ceph_copy_from_page_vector(pages, buf, 0, size); | ||
2092 | rbd_assert(size <= (size_t) INT_MAX); | ||
2093 | ret = (int) size; | ||
2094 | if (version) | ||
2095 | *version = obj_request->version; | ||
2096 | out: | ||
2097 | if (obj_request) | ||
2098 | rbd_obj_request_put(obj_request); | ||
2099 | else | ||
2100 | ceph_release_page_vector(pages, page_count); | ||
2101 | |||
2102 | return ret; | ||
2103 | } | ||
2104 | |||
1706 | /* | 2105 | /* |
1707 | * Read the complete header for the given rbd device. | 2106 | * Read the complete header for the given rbd device. |
1708 | * | 2107 | * |
@@ -1741,24 +2140,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) | |||
1741 | if (!ondisk) | 2140 | if (!ondisk) |
1742 | return ERR_PTR(-ENOMEM); | 2141 | return ERR_PTR(-ENOMEM); |
1743 | 2142 | ||
1744 | ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, | 2143 | ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, |
1745 | rbd_dev->header_name, | ||
1746 | 0, size, | 2144 | 0, size, |
1747 | (char *) ondisk, version); | 2145 | (char *) ondisk, version); |
1748 | |||
1749 | if (ret < 0) | 2146 | if (ret < 0) |
1750 | goto out_err; | 2147 | goto out_err; |
1751 | if (WARN_ON((size_t) ret < size)) { | 2148 | if (WARN_ON((size_t) ret < size)) { |
1752 | ret = -ENXIO; | 2149 | ret = -ENXIO; |
1753 | pr_warning("short header read for image %s" | 2150 | rbd_warn(rbd_dev, "short header read (want %zd got %d)", |
1754 | " (want %zd got %d)\n", | 2151 | size, ret); |
1755 | rbd_dev->spec->image_name, size, ret); | ||
1756 | goto out_err; | 2152 | goto out_err; |
1757 | } | 2153 | } |
1758 | if (!rbd_dev_ondisk_valid(ondisk)) { | 2154 | if (!rbd_dev_ondisk_valid(ondisk)) { |
1759 | ret = -ENXIO; | 2155 | ret = -ENXIO; |
1760 | pr_warning("invalid header for image %s\n", | 2156 | rbd_warn(rbd_dev, "invalid header"); |
1761 | rbd_dev->spec->image_name); | ||
1762 | goto out_err; | 2157 | goto out_err; |
1763 | } | 2158 | } |
1764 | 2159 | ||
@@ -1895,8 +2290,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1895 | disk->fops = &rbd_bd_ops; | 2290 | disk->fops = &rbd_bd_ops; |
1896 | disk->private_data = rbd_dev; | 2291 | disk->private_data = rbd_dev; |
1897 | 2292 | ||
1898 | /* init rq */ | 2293 | q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); |
1899 | q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); | ||
1900 | if (!q) | 2294 | if (!q) |
1901 | goto out_disk; | 2295 | goto out_disk; |
1902 | 2296 | ||
@@ -2243,6 +2637,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
2243 | return NULL; | 2637 | return NULL; |
2244 | 2638 | ||
2245 | spin_lock_init(&rbd_dev->lock); | 2639 | spin_lock_init(&rbd_dev->lock); |
2640 | rbd_dev->flags = 0; | ||
2246 | INIT_LIST_HEAD(&rbd_dev->node); | 2641 | INIT_LIST_HEAD(&rbd_dev->node); |
2247 | INIT_LIST_HEAD(&rbd_dev->snaps); | 2642 | INIT_LIST_HEAD(&rbd_dev->snaps); |
2248 | init_rwsem(&rbd_dev->header_rwsem); | 2643 | init_rwsem(&rbd_dev->header_rwsem); |
@@ -2250,6 +2645,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
2250 | rbd_dev->spec = spec; | 2645 | rbd_dev->spec = spec; |
2251 | rbd_dev->rbd_client = rbdc; | 2646 | rbd_dev->rbd_client = rbdc; |
2252 | 2647 | ||
2648 | /* Initialize the layout used for all rbd requests */ | ||
2649 | |||
2650 | rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
2651 | rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); | ||
2652 | rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | ||
2653 | rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); | ||
2654 | |||
2253 | return rbd_dev; | 2655 | return rbd_dev; |
2254 | } | 2656 | } |
2255 | 2657 | ||
@@ -2360,12 +2762,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, | |||
2360 | __le64 size; | 2762 | __le64 size; |
2361 | } __attribute__ ((packed)) size_buf = { 0 }; | 2763 | } __attribute__ ((packed)) size_buf = { 0 }; |
2362 | 2764 | ||
2363 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2765 | ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, |
2364 | "rbd", "get_size", | 2766 | "rbd", "get_size", |
2365 | (char *) &snapid, sizeof (snapid), | 2767 | (char *) &snapid, sizeof (snapid), |
2366 | (char *) &size_buf, sizeof (size_buf), | 2768 | (char *) &size_buf, sizeof (size_buf), NULL); |
2367 | CEPH_OSD_FLAG_READ, NULL); | 2769 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
2368 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2369 | if (ret < 0) | 2770 | if (ret < 0) |
2370 | return ret; | 2771 | return ret; |
2371 | 2772 | ||
@@ -2396,15 +2797,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) | |||
2396 | if (!reply_buf) | 2797 | if (!reply_buf) |
2397 | return -ENOMEM; | 2798 | return -ENOMEM; |
2398 | 2799 | ||
2399 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2800 | ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, |
2400 | "rbd", "get_object_prefix", | 2801 | "rbd", "get_object_prefix", |
2401 | NULL, 0, | 2802 | NULL, 0, |
2402 | reply_buf, RBD_OBJ_PREFIX_LEN_MAX, | 2803 | reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); |
2403 | CEPH_OSD_FLAG_READ, NULL); | 2804 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
2404 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2405 | if (ret < 0) | 2805 | if (ret < 0) |
2406 | goto out; | 2806 | goto out; |
2407 | ret = 0; /* rbd_req_sync_exec() can return positive */ | ||
2408 | 2807 | ||
2409 | p = reply_buf; | 2808 | p = reply_buf; |
2410 | rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, | 2809 | rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, |
@@ -2435,12 +2834,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, | |||
2435 | u64 incompat; | 2834 | u64 incompat; |
2436 | int ret; | 2835 | int ret; |
2437 | 2836 | ||
2438 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2837 | ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, |
2439 | "rbd", "get_features", | 2838 | "rbd", "get_features", |
2440 | (char *) &snapid, sizeof (snapid), | 2839 | (char *) &snapid, sizeof (snapid), |
2441 | (char *) &features_buf, sizeof (features_buf), | 2840 | (char *) &features_buf, sizeof (features_buf), |
2442 | CEPH_OSD_FLAG_READ, NULL); | 2841 | NULL); |
2443 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | 2842 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
2444 | if (ret < 0) | 2843 | if (ret < 0) |
2445 | return ret; | 2844 | return ret; |
2446 | 2845 | ||
@@ -2474,7 +2873,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
2474 | void *end; | 2873 | void *end; |
2475 | char *image_id; | 2874 | char *image_id; |
2476 | u64 overlap; | 2875 | u64 overlap; |
2477 | size_t len = 0; | ||
2478 | int ret; | 2876 | int ret; |
2479 | 2877 | ||
2480 | parent_spec = rbd_spec_alloc(); | 2878 | parent_spec = rbd_spec_alloc(); |
@@ -2492,12 +2890,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
2492 | } | 2890 | } |
2493 | 2891 | ||
2494 | snapid = cpu_to_le64(CEPH_NOSNAP); | 2892 | snapid = cpu_to_le64(CEPH_NOSNAP); |
2495 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 2893 | ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, |
2496 | "rbd", "get_parent", | 2894 | "rbd", "get_parent", |
2497 | (char *) &snapid, sizeof (snapid), | 2895 | (char *) &snapid, sizeof (snapid), |
2498 | (char *) reply_buf, size, | 2896 | (char *) reply_buf, size, NULL); |
2499 | CEPH_OSD_FLAG_READ, NULL); | 2897 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
2500 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2501 | if (ret < 0) | 2898 | if (ret < 0) |
2502 | goto out_err; | 2899 | goto out_err; |
2503 | 2900 | ||
@@ -2508,13 +2905,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
2508 | if (parent_spec->pool_id == CEPH_NOPOOL) | 2905 | if (parent_spec->pool_id == CEPH_NOPOOL) |
2509 | goto out; /* No parent? No problem. */ | 2906 | goto out; /* No parent? No problem. */ |
2510 | 2907 | ||
2511 | image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); | 2908 | /* The ceph file layout needs to fit pool id in 32 bits */ |
2909 | |||
2910 | ret = -EIO; | ||
2911 | if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) | ||
2912 | goto out; | ||
2913 | |||
2914 | image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); | ||
2512 | if (IS_ERR(image_id)) { | 2915 | if (IS_ERR(image_id)) { |
2513 | ret = PTR_ERR(image_id); | 2916 | ret = PTR_ERR(image_id); |
2514 | goto out_err; | 2917 | goto out_err; |
2515 | } | 2918 | } |
2516 | parent_spec->image_id = image_id; | 2919 | parent_spec->image_id = image_id; |
2517 | parent_spec->image_id_len = len; | ||
2518 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); | 2920 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); |
2519 | ceph_decode_64_safe(&p, end, overlap, out_err); | 2921 | ceph_decode_64_safe(&p, end, overlap, out_err); |
2520 | 2922 | ||
@@ -2544,26 +2946,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) | |||
2544 | 2946 | ||
2545 | rbd_assert(!rbd_dev->spec->image_name); | 2947 | rbd_assert(!rbd_dev->spec->image_name); |
2546 | 2948 | ||
2547 | image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; | 2949 | len = strlen(rbd_dev->spec->image_id); |
2950 | image_id_size = sizeof (__le32) + len; | ||
2548 | image_id = kmalloc(image_id_size, GFP_KERNEL); | 2951 | image_id = kmalloc(image_id_size, GFP_KERNEL); |
2549 | if (!image_id) | 2952 | if (!image_id) |
2550 | return NULL; | 2953 | return NULL; |
2551 | 2954 | ||
2552 | p = image_id; | 2955 | p = image_id; |
2553 | end = (char *) image_id + image_id_size; | 2956 | end = (char *) image_id + image_id_size; |
2554 | ceph_encode_string(&p, end, rbd_dev->spec->image_id, | 2957 | ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); |
2555 | (u32) rbd_dev->spec->image_id_len); | ||
2556 | 2958 | ||
2557 | size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; | 2959 | size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; |
2558 | reply_buf = kmalloc(size, GFP_KERNEL); | 2960 | reply_buf = kmalloc(size, GFP_KERNEL); |
2559 | if (!reply_buf) | 2961 | if (!reply_buf) |
2560 | goto out; | 2962 | goto out; |
2561 | 2963 | ||
2562 | ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, | 2964 | ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, |
2563 | "rbd", "dir_get_name", | 2965 | "rbd", "dir_get_name", |
2564 | image_id, image_id_size, | 2966 | image_id, image_id_size, |
2565 | (char *) reply_buf, size, | 2967 | (char *) reply_buf, size, NULL); |
2566 | CEPH_OSD_FLAG_READ, NULL); | ||
2567 | if (ret < 0) | 2968 | if (ret < 0) |
2568 | goto out; | 2969 | goto out; |
2569 | p = reply_buf; | 2970 | p = reply_buf; |
@@ -2602,8 +3003,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) | |||
2602 | 3003 | ||
2603 | osdc = &rbd_dev->rbd_client->client->osdc; | 3004 | osdc = &rbd_dev->rbd_client->client->osdc; |
2604 | name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); | 3005 | name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); |
2605 | if (!name) | 3006 | if (!name) { |
2606 | return -EIO; /* pool id too large (>= 2^31) */ | 3007 | rbd_warn(rbd_dev, "there is no pool with id %llu", |
3008 | rbd_dev->spec->pool_id); /* Really a BUG() */ | ||
3009 | return -EIO; | ||
3010 | } | ||
2607 | 3011 | ||
2608 | rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); | 3012 | rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); |
2609 | if (!rbd_dev->spec->pool_name) | 3013 | if (!rbd_dev->spec->pool_name) |
@@ -2612,19 +3016,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) | |||
2612 | /* Fetch the image name; tolerate failure here */ | 3016 | /* Fetch the image name; tolerate failure here */ |
2613 | 3017 | ||
2614 | name = rbd_dev_image_name(rbd_dev); | 3018 | name = rbd_dev_image_name(rbd_dev); |
2615 | if (name) { | 3019 | if (name) |
2616 | rbd_dev->spec->image_name_len = strlen(name); | ||
2617 | rbd_dev->spec->image_name = (char *) name; | 3020 | rbd_dev->spec->image_name = (char *) name; |
2618 | } else { | 3021 | else |
2619 | pr_warning(RBD_DRV_NAME "%d " | 3022 | rbd_warn(rbd_dev, "unable to get image name"); |
2620 | "unable to get image name for image id %s\n", | ||
2621 | rbd_dev->major, rbd_dev->spec->image_id); | ||
2622 | } | ||
2623 | 3023 | ||
2624 | /* Look up the snapshot name. */ | 3024 | /* Look up the snapshot name. */ |
2625 | 3025 | ||
2626 | name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); | 3026 | name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); |
2627 | if (!name) { | 3027 | if (!name) { |
3028 | rbd_warn(rbd_dev, "no snapshot with id %llu", | ||
3029 | rbd_dev->spec->snap_id); /* Really a BUG() */ | ||
2628 | ret = -EIO; | 3030 | ret = -EIO; |
2629 | goto out_err; | 3031 | goto out_err; |
2630 | } | 3032 | } |
@@ -2665,12 +3067,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) | |||
2665 | if (!reply_buf) | 3067 | if (!reply_buf) |
2666 | return -ENOMEM; | 3068 | return -ENOMEM; |
2667 | 3069 | ||
2668 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 3070 | ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, |
2669 | "rbd", "get_snapcontext", | 3071 | "rbd", "get_snapcontext", |
2670 | NULL, 0, | 3072 | NULL, 0, |
2671 | reply_buf, size, | 3073 | reply_buf, size, ver); |
2672 | CEPH_OSD_FLAG_READ, ver); | 3074 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
2673 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2674 | if (ret < 0) | 3075 | if (ret < 0) |
2675 | goto out; | 3076 | goto out; |
2676 | 3077 | ||
@@ -2735,12 +3136,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) | |||
2735 | return ERR_PTR(-ENOMEM); | 3136 | return ERR_PTR(-ENOMEM); |
2736 | 3137 | ||
2737 | snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); | 3138 | snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); |
2738 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, | 3139 | ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, |
2739 | "rbd", "get_snapshot_name", | 3140 | "rbd", "get_snapshot_name", |
2740 | (char *) &snap_id, sizeof (snap_id), | 3141 | (char *) &snap_id, sizeof (snap_id), |
2741 | reply_buf, size, | 3142 | reply_buf, size, NULL); |
2742 | CEPH_OSD_FLAG_READ, NULL); | 3143 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
2743 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
2744 | if (ret < 0) | 3144 | if (ret < 0) |
2745 | goto out; | 3145 | goto out; |
2746 | 3146 | ||
@@ -2766,7 +3166,7 @@ out: | |||
2766 | static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, | 3166 | static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, |
2767 | u64 *snap_size, u64 *snap_features) | 3167 | u64 *snap_size, u64 *snap_features) |
2768 | { | 3168 | { |
2769 | __le64 snap_id; | 3169 | u64 snap_id; |
2770 | u8 order; | 3170 | u8 order; |
2771 | int ret; | 3171 | int ret; |
2772 | 3172 | ||
@@ -2865,10 +3265,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) | |||
2865 | if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { | 3265 | if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { |
2866 | struct list_head *next = links->next; | 3266 | struct list_head *next = links->next; |
2867 | 3267 | ||
2868 | /* Existing snapshot not in the new snap context */ | 3268 | /* |
2869 | 3269 | * A previously-existing snapshot is not in | |
3270 | * the new snap context. | ||
3271 | * | ||
3272 | * If the now missing snapshot is the one the | ||
3273 | * image is mapped to, clear its exists flag | ||
3274 | * so we can avoid sending any more requests | ||
3275 | * to it. | ||
3276 | */ | ||
2870 | if (rbd_dev->spec->snap_id == snap->id) | 3277 | if (rbd_dev->spec->snap_id == snap->id) |
2871 | rbd_dev->exists = false; | 3278 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
2872 | rbd_remove_snap_dev(snap); | 3279 | rbd_remove_snap_dev(snap); |
2873 | dout("%ssnap id %llu has been removed\n", | 3280 | dout("%ssnap id %llu has been removed\n", |
2874 | rbd_dev->spec->snap_id == snap->id ? | 3281 | rbd_dev->spec->snap_id == snap->id ? |
@@ -2983,22 +3390,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) | |||
2983 | device_unregister(&rbd_dev->dev); | 3390 | device_unregister(&rbd_dev->dev); |
2984 | } | 3391 | } |
2985 | 3392 | ||
2986 | static int rbd_init_watch_dev(struct rbd_device *rbd_dev) | ||
2987 | { | ||
2988 | int ret, rc; | ||
2989 | |||
2990 | do { | ||
2991 | ret = rbd_req_sync_watch(rbd_dev); | ||
2992 | if (ret == -ERANGE) { | ||
2993 | rc = rbd_dev_refresh(rbd_dev, NULL); | ||
2994 | if (rc < 0) | ||
2995 | return rc; | ||
2996 | } | ||
2997 | } while (ret == -ERANGE); | ||
2998 | |||
2999 | return ret; | ||
3000 | } | ||
3001 | |||
3002 | static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); | 3393 | static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); |
3003 | 3394 | ||
3004 | /* | 3395 | /* |
@@ -3138,11 +3529,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) | |||
3138 | size_t len; | 3529 | size_t len; |
3139 | 3530 | ||
3140 | len = next_token(buf); | 3531 | len = next_token(buf); |
3141 | dup = kmalloc(len + 1, GFP_KERNEL); | 3532 | dup = kmemdup(*buf, len + 1, GFP_KERNEL); |
3142 | if (!dup) | 3533 | if (!dup) |
3143 | return NULL; | 3534 | return NULL; |
3144 | |||
3145 | memcpy(dup, *buf, len); | ||
3146 | *(dup + len) = '\0'; | 3535 | *(dup + len) = '\0'; |
3147 | *buf += len; | 3536 | *buf += len; |
3148 | 3537 | ||
@@ -3210,8 +3599,10 @@ static int rbd_add_parse_args(const char *buf, | |||
3210 | /* The first four tokens are required */ | 3599 | /* The first four tokens are required */ |
3211 | 3600 | ||
3212 | len = next_token(&buf); | 3601 | len = next_token(&buf); |
3213 | if (!len) | 3602 | if (!len) { |
3214 | return -EINVAL; /* Missing monitor address(es) */ | 3603 | rbd_warn(NULL, "no monitor address(es) provided"); |
3604 | return -EINVAL; | ||
3605 | } | ||
3215 | mon_addrs = buf; | 3606 | mon_addrs = buf; |
3216 | mon_addrs_size = len + 1; | 3607 | mon_addrs_size = len + 1; |
3217 | buf += len; | 3608 | buf += len; |
@@ -3220,8 +3611,10 @@ static int rbd_add_parse_args(const char *buf, | |||
3220 | options = dup_token(&buf, NULL); | 3611 | options = dup_token(&buf, NULL); |
3221 | if (!options) | 3612 | if (!options) |
3222 | return -ENOMEM; | 3613 | return -ENOMEM; |
3223 | if (!*options) | 3614 | if (!*options) { |
3224 | goto out_err; /* Missing options */ | 3615 | rbd_warn(NULL, "no options provided"); |
3616 | goto out_err; | ||
3617 | } | ||
3225 | 3618 | ||
3226 | spec = rbd_spec_alloc(); | 3619 | spec = rbd_spec_alloc(); |
3227 | if (!spec) | 3620 | if (!spec) |
@@ -3230,14 +3623,18 @@ static int rbd_add_parse_args(const char *buf, | |||
3230 | spec->pool_name = dup_token(&buf, NULL); | 3623 | spec->pool_name = dup_token(&buf, NULL); |
3231 | if (!spec->pool_name) | 3624 | if (!spec->pool_name) |
3232 | goto out_mem; | 3625 | goto out_mem; |
3233 | if (!*spec->pool_name) | 3626 | if (!*spec->pool_name) { |
3234 | goto out_err; /* Missing pool name */ | 3627 | rbd_warn(NULL, "no pool name provided"); |
3628 | goto out_err; | ||
3629 | } | ||
3235 | 3630 | ||
3236 | spec->image_name = dup_token(&buf, &spec->image_name_len); | 3631 | spec->image_name = dup_token(&buf, NULL); |
3237 | if (!spec->image_name) | 3632 | if (!spec->image_name) |
3238 | goto out_mem; | 3633 | goto out_mem; |
3239 | if (!*spec->image_name) | 3634 | if (!*spec->image_name) { |
3240 | goto out_err; /* Missing image name */ | 3635 | rbd_warn(NULL, "no image name provided"); |
3636 | goto out_err; | ||
3637 | } | ||
3241 | 3638 | ||
3242 | /* | 3639 | /* |
3243 | * Snapshot name is optional; default is to use "-" | 3640 | * Snapshot name is optional; default is to use "-" |
@@ -3251,10 +3648,9 @@ static int rbd_add_parse_args(const char *buf, | |||
3251 | ret = -ENAMETOOLONG; | 3648 | ret = -ENAMETOOLONG; |
3252 | goto out_err; | 3649 | goto out_err; |
3253 | } | 3650 | } |
3254 | spec->snap_name = kmalloc(len + 1, GFP_KERNEL); | 3651 | spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); |
3255 | if (!spec->snap_name) | 3652 | if (!spec->snap_name) |
3256 | goto out_mem; | 3653 | goto out_mem; |
3257 | memcpy(spec->snap_name, buf, len); | ||
3258 | *(spec->snap_name + len) = '\0'; | 3654 | *(spec->snap_name + len) = '\0'; |
3259 | 3655 | ||
3260 | /* Initialize all rbd options to the defaults */ | 3656 | /* Initialize all rbd options to the defaults */ |
@@ -3323,7 +3719,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
3323 | * First, see if the format 2 image id file exists, and if | 3719 | * First, see if the format 2 image id file exists, and if |
3324 | * so, get the image's persistent id from it. | 3720 | * so, get the image's persistent id from it. |
3325 | */ | 3721 | */ |
3326 | size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; | 3722 | size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); |
3327 | object_name = kmalloc(size, GFP_NOIO); | 3723 | object_name = kmalloc(size, GFP_NOIO); |
3328 | if (!object_name) | 3724 | if (!object_name) |
3329 | return -ENOMEM; | 3725 | return -ENOMEM; |
@@ -3339,21 +3735,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) | |||
3339 | goto out; | 3735 | goto out; |
3340 | } | 3736 | } |
3341 | 3737 | ||
3342 | ret = rbd_req_sync_exec(rbd_dev, object_name, | 3738 | ret = rbd_obj_method_sync(rbd_dev, object_name, |
3343 | "rbd", "get_id", | 3739 | "rbd", "get_id", |
3344 | NULL, 0, | 3740 | NULL, 0, |
3345 | response, RBD_IMAGE_ID_LEN_MAX, | 3741 | response, RBD_IMAGE_ID_LEN_MAX, NULL); |
3346 | CEPH_OSD_FLAG_READ, NULL); | 3742 | dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); |
3347 | dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); | ||
3348 | if (ret < 0) | 3743 | if (ret < 0) |
3349 | goto out; | 3744 | goto out; |
3350 | ret = 0; /* rbd_req_sync_exec() can return positive */ | ||
3351 | 3745 | ||
3352 | p = response; | 3746 | p = response; |
3353 | rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, | 3747 | rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, |
3354 | p + RBD_IMAGE_ID_LEN_MAX, | 3748 | p + RBD_IMAGE_ID_LEN_MAX, |
3355 | &rbd_dev->spec->image_id_len, | 3749 | NULL, GFP_NOIO); |
3356 | GFP_NOIO); | ||
3357 | if (IS_ERR(rbd_dev->spec->image_id)) { | 3750 | if (IS_ERR(rbd_dev->spec->image_id)) { |
3358 | ret = PTR_ERR(rbd_dev->spec->image_id); | 3751 | ret = PTR_ERR(rbd_dev->spec->image_id); |
3359 | rbd_dev->spec->image_id = NULL; | 3752 | rbd_dev->spec->image_id = NULL; |
@@ -3377,11 +3770,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) | |||
3377 | rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); | 3770 | rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); |
3378 | if (!rbd_dev->spec->image_id) | 3771 | if (!rbd_dev->spec->image_id) |
3379 | return -ENOMEM; | 3772 | return -ENOMEM; |
3380 | rbd_dev->spec->image_id_len = 0; | ||
3381 | 3773 | ||
3382 | /* Record the header object name for this rbd image. */ | 3774 | /* Record the header object name for this rbd image. */ |
3383 | 3775 | ||
3384 | size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); | 3776 | size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); |
3385 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); | 3777 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); |
3386 | if (!rbd_dev->header_name) { | 3778 | if (!rbd_dev->header_name) { |
3387 | ret = -ENOMEM; | 3779 | ret = -ENOMEM; |
@@ -3427,7 +3819,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | |||
3427 | * Image id was filled in by the caller. Record the header | 3819 | * Image id was filled in by the caller. Record the header |
3428 | * object name for this rbd image. | 3820 | * object name for this rbd image. |
3429 | */ | 3821 | */ |
3430 | size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; | 3822 | size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); |
3431 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); | 3823 | rbd_dev->header_name = kmalloc(size, GFP_KERNEL); |
3432 | if (!rbd_dev->header_name) | 3824 | if (!rbd_dev->header_name) |
3433 | return -ENOMEM; | 3825 | return -ENOMEM; |
@@ -3542,7 +3934,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) | |||
3542 | if (ret) | 3934 | if (ret) |
3543 | goto err_out_bus; | 3935 | goto err_out_bus; |
3544 | 3936 | ||
3545 | ret = rbd_init_watch_dev(rbd_dev); | 3937 | ret = rbd_dev_header_watch_sync(rbd_dev, 1); |
3546 | if (ret) | 3938 | if (ret) |
3547 | goto err_out_bus; | 3939 | goto err_out_bus; |
3548 | 3940 | ||
@@ -3638,6 +4030,13 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
3638 | goto err_out_client; | 4030 | goto err_out_client; |
3639 | spec->pool_id = (u64) rc; | 4031 | spec->pool_id = (u64) rc; |
3640 | 4032 | ||
4033 | /* The ceph file layout needs to fit pool id in 32 bits */ | ||
4034 | |||
4035 | if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { | ||
4036 | rc = -EIO; | ||
4037 | goto err_out_client; | ||
4038 | } | ||
4039 | |||
3641 | rbd_dev = rbd_dev_create(rbdc, spec); | 4040 | rbd_dev = rbd_dev_create(rbdc, spec); |
3642 | if (!rbd_dev) | 4041 | if (!rbd_dev) |
3643 | goto err_out_client; | 4042 | goto err_out_client; |
@@ -3691,15 +4090,8 @@ static void rbd_dev_release(struct device *dev) | |||
3691 | { | 4090 | { |
3692 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | 4091 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
3693 | 4092 | ||
3694 | if (rbd_dev->watch_request) { | ||
3695 | struct ceph_client *client = rbd_dev->rbd_client->client; | ||
3696 | |||
3697 | ceph_osdc_unregister_linger_request(&client->osdc, | ||
3698 | rbd_dev->watch_request); | ||
3699 | } | ||
3700 | if (rbd_dev->watch_event) | 4093 | if (rbd_dev->watch_event) |
3701 | rbd_req_sync_unwatch(rbd_dev); | 4094 | rbd_dev_header_watch_sync(rbd_dev, 0); |
3702 | |||
3703 | 4095 | ||
3704 | /* clean up and free blkdev */ | 4096 | /* clean up and free blkdev */ |
3705 | rbd_free_disk(rbd_dev); | 4097 | rbd_free_disk(rbd_dev); |
@@ -3743,10 +4135,14 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
3743 | goto done; | 4135 | goto done; |
3744 | } | 4136 | } |
3745 | 4137 | ||
3746 | if (rbd_dev->open_count) { | 4138 | spin_lock_irq(&rbd_dev->lock); |
4139 | if (rbd_dev->open_count) | ||
3747 | ret = -EBUSY; | 4140 | ret = -EBUSY; |
4141 | else | ||
4142 | set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); | ||
4143 | spin_unlock_irq(&rbd_dev->lock); | ||
4144 | if (ret < 0) | ||
3748 | goto done; | 4145 | goto done; |
3749 | } | ||
3750 | 4146 | ||
3751 | rbd_remove_all_snaps(rbd_dev); | 4147 | rbd_remove_all_snaps(rbd_dev); |
3752 | rbd_bus_del_dev(rbd_dev); | 4148 | rbd_bus_del_dev(rbd_dev); |
@@ -3786,6 +4182,11 @@ int __init rbd_init(void) | |||
3786 | { | 4182 | { |
3787 | int rc; | 4183 | int rc; |
3788 | 4184 | ||
4185 | if (!libceph_compatible(NULL)) { | ||
4186 | rbd_warn(NULL, "libceph incompatibility (quitting)"); | ||
4187 | |||
4188 | return -EINVAL; | ||
4189 | } | ||
3789 | rc = rbd_sysfs_init(); | 4190 | rc = rbd_sysfs_init(); |
3790 | if (rc) | 4191 | if (rc) |
3791 | return rc; | 4192 | return rc; |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 064d1a68d2c1..fc613715af46 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) | |||
315 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 315 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
316 | NULL, 0, | 316 | NULL, 0, |
317 | ci->i_truncate_seq, ci->i_truncate_size, | 317 | ci->i_truncate_seq, ci->i_truncate_size, |
318 | NULL, false, 1, 0); | 318 | NULL, false, 0); |
319 | if (IS_ERR(req)) | 319 | if (IS_ERR(req)) |
320 | return PTR_ERR(req); | 320 | return PTR_ERR(req); |
321 | 321 | ||
@@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
492 | &ci->i_layout, snapc, | 492 | &ci->i_layout, snapc, |
493 | page_off, len, | 493 | page_off, len, |
494 | ci->i_truncate_seq, ci->i_truncate_size, | 494 | ci->i_truncate_seq, ci->i_truncate_size, |
495 | &inode->i_mtime, | 495 | &inode->i_mtime, &page, 1); |
496 | &page, 1, 0, 0, true); | ||
497 | if (err < 0) { | 496 | if (err < 0) { |
498 | dout("writepage setting page/mapping error %d %p\n", err, page); | 497 | dout("writepage setting page/mapping error %d %p\n", err, page); |
499 | SetPageError(page); | 498 | SetPageError(page); |
@@ -838,7 +837,7 @@ get_more_pages: | |||
838 | snapc, do_sync, | 837 | snapc, do_sync, |
839 | ci->i_truncate_seq, | 838 | ci->i_truncate_seq, |
840 | ci->i_truncate_size, | 839 | ci->i_truncate_size, |
841 | &inode->i_mtime, true, 1, 0); | 840 | &inode->i_mtime, true, 0); |
842 | 841 | ||
843 | if (IS_ERR(req)) { | 842 | if (IS_ERR(req)) { |
844 | rc = PTR_ERR(req); | 843 | rc = PTR_ERR(req); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a1d9bb30c1bf..1e1e02055a2b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -611,8 +611,16 @@ retry: | |||
611 | 611 | ||
612 | if (flags & CEPH_CAP_FLAG_AUTH) | 612 | if (flags & CEPH_CAP_FLAG_AUTH) |
613 | ci->i_auth_cap = cap; | 613 | ci->i_auth_cap = cap; |
614 | else if (ci->i_auth_cap == cap) | 614 | else if (ci->i_auth_cap == cap) { |
615 | ci->i_auth_cap = NULL; | 615 | ci->i_auth_cap = NULL; |
616 | spin_lock(&mdsc->cap_dirty_lock); | ||
617 | if (!list_empty(&ci->i_dirty_item)) { | ||
618 | dout(" moving %p to cap_dirty_migrating\n", inode); | ||
619 | list_move(&ci->i_dirty_item, | ||
620 | &mdsc->cap_dirty_migrating); | ||
621 | } | ||
622 | spin_unlock(&mdsc->cap_dirty_lock); | ||
623 | } | ||
616 | 624 | ||
617 | dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", | 625 | dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", |
618 | inode, ceph_vinop(inode), cap, ceph_cap_string(issued), | 626 | inode, ceph_vinop(inode), cap, ceph_cap_string(issued), |
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1460 | struct ceph_mds_client *mdsc = fsc->mdsc; | 1468 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1461 | struct inode *inode = &ci->vfs_inode; | 1469 | struct inode *inode = &ci->vfs_inode; |
1462 | struct ceph_cap *cap; | 1470 | struct ceph_cap *cap; |
1463 | int file_wanted, used; | 1471 | int file_wanted, used, cap_used; |
1464 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ | 1472 | int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ |
1465 | int issued, implemented, want, retain, revoking, flushing = 0; | 1473 | int issued, implemented, want, retain, revoking, flushing = 0; |
1466 | int mds = -1; /* keep track of how far we've gone through i_caps list | 1474 | int mds = -1; /* keep track of how far we've gone through i_caps list |
@@ -1563,9 +1571,14 @@ retry_locked: | |||
1563 | 1571 | ||
1564 | /* NOTE: no side-effects allowed, until we take s_mutex */ | 1572 | /* NOTE: no side-effects allowed, until we take s_mutex */ |
1565 | 1573 | ||
1574 | cap_used = used; | ||
1575 | if (ci->i_auth_cap && cap != ci->i_auth_cap) | ||
1576 | cap_used &= ~ci->i_auth_cap->issued; | ||
1577 | |||
1566 | revoking = cap->implemented & ~cap->issued; | 1578 | revoking = cap->implemented & ~cap->issued; |
1567 | dout(" mds%d cap %p issued %s implemented %s revoking %s\n", | 1579 | dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", |
1568 | cap->mds, cap, ceph_cap_string(cap->issued), | 1580 | cap->mds, cap, ceph_cap_string(cap->issued), |
1581 | ceph_cap_string(cap_used), | ||
1569 | ceph_cap_string(cap->implemented), | 1582 | ceph_cap_string(cap->implemented), |
1570 | ceph_cap_string(revoking)); | 1583 | ceph_cap_string(revoking)); |
1571 | 1584 | ||
@@ -1593,7 +1606,7 @@ retry_locked: | |||
1593 | } | 1606 | } |
1594 | 1607 | ||
1595 | /* completed revocation? going down and there are no caps? */ | 1608 | /* completed revocation? going down and there are no caps? */ |
1596 | if (revoking && (revoking & used) == 0) { | 1609 | if (revoking && (revoking & cap_used) == 0) { |
1597 | dout("completed revocation of %s\n", | 1610 | dout("completed revocation of %s\n", |
1598 | ceph_cap_string(cap->implemented & ~cap->issued)); | 1611 | ceph_cap_string(cap->implemented & ~cap->issued)); |
1599 | goto ack; | 1612 | goto ack; |
@@ -1670,8 +1683,8 @@ ack: | |||
1670 | sent++; | 1683 | sent++; |
1671 | 1684 | ||
1672 | /* __send_cap drops i_ceph_lock */ | 1685 | /* __send_cap drops i_ceph_lock */ |
1673 | delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, | 1686 | delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, |
1674 | retain, flushing, NULL); | 1687 | want, retain, flushing, NULL); |
1675 | goto retry; /* retake i_ceph_lock and restart our cap scan. */ | 1688 | goto retry; /* retake i_ceph_lock and restart our cap scan. */ |
1676 | } | 1689 | } |
1677 | 1690 | ||
@@ -2416,7 +2429,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2416 | dout("mds wanted %s -> %s\n", | 2429 | dout("mds wanted %s -> %s\n", |
2417 | ceph_cap_string(le32_to_cpu(grant->wanted)), | 2430 | ceph_cap_string(le32_to_cpu(grant->wanted)), |
2418 | ceph_cap_string(wanted)); | 2431 | ceph_cap_string(wanted)); |
2419 | grant->wanted = cpu_to_le32(wanted); | 2432 | /* imported cap may not have correct mds_wanted */ |
2433 | if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) | ||
2434 | check_caps = 1; | ||
2420 | } | 2435 | } |
2421 | 2436 | ||
2422 | cap->seq = seq; | 2437 | cap->seq = seq; |
@@ -2820,6 +2835,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2820 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 2835 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
2821 | (unsigned)seq); | 2836 | (unsigned)seq); |
2822 | 2837 | ||
2838 | if (op == CEPH_CAP_OP_IMPORT) | ||
2839 | ceph_add_cap_releases(mdsc, session); | ||
2840 | |||
2823 | /* lookup ino */ | 2841 | /* lookup ino */ |
2824 | inode = ceph_find_inode(sb, vino); | 2842 | inode = ceph_find_inode(sb, vino); |
2825 | ci = ceph_inode(inode); | 2843 | ci = ceph_inode(inode); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e51558fca3a3..9c4325e654ca 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
243 | err = ceph_mdsc_do_request(mdsc, | 243 | err = ceph_mdsc_do_request(mdsc, |
244 | (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, | 244 | (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, |
245 | req); | 245 | req); |
246 | if (err) | ||
247 | goto out_err; | ||
248 | |||
246 | err = ceph_handle_snapdir(req, dentry, err); | 249 | err = ceph_handle_snapdir(req, dentry, err); |
247 | if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) | 250 | if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) |
248 | err = ceph_handle_notrace_create(dir, dentry); | 251 | err = ceph_handle_notrace_create(dir, dentry); |
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
263 | err = finish_no_open(file, dn); | 266 | err = finish_no_open(file, dn); |
264 | } else { | 267 | } else { |
265 | dout("atomic_open finish_open on dn %p\n", dn); | 268 | dout("atomic_open finish_open on dn %p\n", dn); |
269 | if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { | ||
270 | *opened |= FILE_CREATED; | ||
271 | } | ||
266 | err = finish_open(file, dentry, ceph_open, opened); | 272 | err = finish_open(file, dentry, ceph_open, opened); |
267 | } | 273 | } |
268 | 274 | ||
@@ -535,7 +541,7 @@ more: | |||
535 | ci->i_snap_realm->cached_context, | 541 | ci->i_snap_realm->cached_context, |
536 | do_sync, | 542 | do_sync, |
537 | ci->i_truncate_seq, ci->i_truncate_size, | 543 | ci->i_truncate_seq, ci->i_truncate_size, |
538 | &mtime, false, 2, page_align); | 544 | &mtime, false, page_align); |
539 | if (IS_ERR(req)) | 545 | if (IS_ERR(req)) |
540 | return PTR_ERR(req); | 546 | return PTR_ERR(req); |
541 | 547 | ||
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 36549a46e311..3b22150d3e19 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
194 | return -EFAULT; | 194 | return -EFAULT; |
195 | 195 | ||
196 | down_read(&osdc->map_sem); | 196 | down_read(&osdc->map_sem); |
197 | r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, | 197 | r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, |
198 | &dl.object_no, &dl.object_offset, | 198 | &dl.object_no, &dl.object_offset, |
199 | &olen); | 199 | &olen); |
200 | if (r < 0) | 200 | if (r < 0) |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9165eb8309eb..d95842036c8b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -233,6 +233,30 @@ bad: | |||
233 | } | 233 | } |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * parse create results | ||
237 | */ | ||
238 | static int parse_reply_info_create(void **p, void *end, | ||
239 | struct ceph_mds_reply_info_parsed *info, | ||
240 | int features) | ||
241 | { | ||
242 | if (features & CEPH_FEATURE_REPLY_CREATE_INODE) { | ||
243 | if (*p == end) { | ||
244 | info->has_create_ino = false; | ||
245 | } else { | ||
246 | info->has_create_ino = true; | ||
247 | info->ino = ceph_decode_64(p); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | if (unlikely(*p != end)) | ||
252 | goto bad; | ||
253 | return 0; | ||
254 | |||
255 | bad: | ||
256 | return -EIO; | ||
257 | } | ||
258 | |||
259 | /* | ||
236 | * parse extra results | 260 | * parse extra results |
237 | */ | 261 | */ |
238 | static int parse_reply_info_extra(void **p, void *end, | 262 | static int parse_reply_info_extra(void **p, void *end, |
@@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end, | |||
241 | { | 265 | { |
242 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) | 266 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) |
243 | return parse_reply_info_filelock(p, end, info, features); | 267 | return parse_reply_info_filelock(p, end, info, features); |
244 | else | 268 | else if (info->head->op == CEPH_MDS_OP_READDIR) |
245 | return parse_reply_info_dir(p, end, info, features); | 269 | return parse_reply_info_dir(p, end, info, features); |
270 | else if (info->head->op == CEPH_MDS_OP_CREATE) | ||
271 | return parse_reply_info_create(p, end, info, features); | ||
272 | else | ||
273 | return -EIO; | ||
246 | } | 274 | } |
247 | 275 | ||
248 | /* | 276 | /* |
@@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2170 | mutex_lock(&req->r_fill_mutex); | 2198 | mutex_lock(&req->r_fill_mutex); |
2171 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2199 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2172 | if (err == 0) { | 2200 | if (err == 0) { |
2173 | if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && | 2201 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || |
2202 | req->r_op == CEPH_MDS_OP_LSSNAP) && | ||
2174 | rinfo->dir_nr) | 2203 | rinfo->dir_nr) |
2175 | ceph_readdir_prepopulate(req, req->r_session); | 2204 | ceph_readdir_prepopulate(req, req->r_session); |
2176 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2205 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index dd26846dd71d..567f7c60354e 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed { | |||
74 | struct ceph_mds_reply_info_in *dir_in; | 74 | struct ceph_mds_reply_info_in *dir_in; |
75 | u8 dir_complete, dir_end; | 75 | u8 dir_complete, dir_end; |
76 | }; | 76 | }; |
77 | |||
78 | /* for create results */ | ||
79 | struct { | ||
80 | bool has_create_ino; | ||
81 | u64 ino; | ||
82 | }; | ||
77 | }; | 83 | }; |
78 | 84 | ||
79 | /* encoded blob describing snapshot contexts for certain | 85 | /* encoded blob describing snapshot contexts for certain |
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index cd5097d7c804..89fa4a940a0f 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c | |||
@@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s) | |||
15 | case CEPH_MDS_STATE_BOOT: return "up:boot"; | 15 | case CEPH_MDS_STATE_BOOT: return "up:boot"; |
16 | case CEPH_MDS_STATE_STANDBY: return "up:standby"; | 16 | case CEPH_MDS_STATE_STANDBY: return "up:standby"; |
17 | case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; | 17 | case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; |
18 | case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay"; | ||
18 | case CEPH_MDS_STATE_CREATING: return "up:creating"; | 19 | case CEPH_MDS_STATE_CREATING: return "up:creating"; |
19 | case CEPH_MDS_STATE_STARTING: return "up:starting"; | 20 | case CEPH_MDS_STATE_STARTING: return "up:starting"; |
20 | /* up and in */ | 21 | /* up and in */ |
@@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op) | |||
50 | case CEPH_MDS_OP_LOOKUP: return "lookup"; | 51 | case CEPH_MDS_OP_LOOKUP: return "lookup"; |
51 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; | 52 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; |
52 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; | 53 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; |
54 | case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; | ||
53 | case CEPH_MDS_OP_GETATTR: return "getattr"; | 55 | case CEPH_MDS_OP_GETATTR: return "getattr"; |
54 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; | 56 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; |
55 | case CEPH_MDS_OP_SETATTR: return "setattr"; | 57 | case CEPH_MDS_OP_SETATTR: return "setattr"; |
56 | case CEPH_MDS_OP_RMXATTR: return "rmxattr"; | 58 | case CEPH_MDS_OP_RMXATTR: return "rmxattr"; |
59 | case CEPH_MDS_OP_SETLAYOUT: return "setlayou"; | ||
60 | case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout"; | ||
57 | case CEPH_MDS_OP_READDIR: return "readdir"; | 61 | case CEPH_MDS_OP_READDIR: return "readdir"; |
58 | case CEPH_MDS_OP_MKNOD: return "mknod"; | 62 | case CEPH_MDS_OP_MKNOD: return "mknod"; |
59 | case CEPH_MDS_OP_LINK: return "link"; | 63 | case CEPH_MDS_OP_LINK: return "link"; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 66ebe720e40d..9861cce10a49 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
798 | /* file.c */ | 798 | /* file.c */ |
799 | extern const struct file_operations ceph_file_fops; | 799 | extern const struct file_operations ceph_file_fops; |
800 | extern const struct address_space_operations ceph_aops; | 800 | extern const struct address_space_operations ceph_aops; |
801 | extern int ceph_copy_to_page_vector(struct page **pages, | 801 | |
802 | const char *data, | ||
803 | loff_t off, size_t len); | ||
804 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
805 | char *data, | ||
806 | loff_t off, size_t len); | ||
807 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
808 | extern int ceph_open(struct inode *inode, struct file *file); | 802 | extern int ceph_open(struct inode *inode, struct file *file); |
809 | extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | 803 | extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, |
810 | struct file *file, unsigned flags, umode_t mode, | 804 | struct file *file, unsigned flags, umode_t mode, |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2c2ae5be9902..2135817e708d 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -29,9 +29,94 @@ struct ceph_vxattr { | |||
29 | size_t name_size; /* strlen(name) + 1 (for '\0') */ | 29 | size_t name_size; /* strlen(name) + 1 (for '\0') */ |
30 | size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, | 30 | size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, |
31 | size_t size); | 31 | size_t size); |
32 | bool readonly; | 32 | bool readonly, hidden; |
33 | bool (*exists_cb)(struct ceph_inode_info *ci); | ||
33 | }; | 34 | }; |
34 | 35 | ||
36 | /* layouts */ | ||
37 | |||
38 | static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) | ||
39 | { | ||
40 | size_t s; | ||
41 | char *p = (char *)&ci->i_layout; | ||
42 | |||
43 | for (s = 0; s < sizeof(ci->i_layout); s++, p++) | ||
44 | if (*p) | ||
45 | return true; | ||
46 | return false; | ||
47 | } | ||
48 | |||
49 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | ||
50 | size_t size) | ||
51 | { | ||
52 | int ret; | ||
53 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); | ||
54 | struct ceph_osd_client *osdc = &fsc->client->osdc; | ||
55 | s64 pool = ceph_file_layout_pg_pool(ci->i_layout); | ||
56 | const char *pool_name; | ||
57 | |||
58 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); | ||
59 | down_read(&osdc->map_sem); | ||
60 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); | ||
61 | if (pool_name) | ||
62 | ret = snprintf(val, size, | ||
63 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", | ||
64 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | ||
65 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | ||
66 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), | ||
67 | pool_name); | ||
68 | else | ||
69 | ret = snprintf(val, size, | ||
70 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", | ||
71 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | ||
72 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | ||
73 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), | ||
74 | (unsigned long long)pool); | ||
75 | |||
76 | up_read(&osdc->map_sem); | ||
77 | return ret; | ||
78 | } | ||
79 | |||
80 | static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, | ||
81 | char *val, size_t size) | ||
82 | { | ||
83 | return snprintf(val, size, "%lld", | ||
84 | (unsigned long long)ceph_file_layout_su(ci->i_layout)); | ||
85 | } | ||
86 | |||
87 | static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, | ||
88 | char *val, size_t size) | ||
89 | { | ||
90 | return snprintf(val, size, "%lld", | ||
91 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); | ||
92 | } | ||
93 | |||
94 | static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, | ||
95 | char *val, size_t size) | ||
96 | { | ||
97 | return snprintf(val, size, "%lld", | ||
98 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); | ||
99 | } | ||
100 | |||
101 | static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, | ||
102 | char *val, size_t size) | ||
103 | { | ||
104 | int ret; | ||
105 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); | ||
106 | struct ceph_osd_client *osdc = &fsc->client->osdc; | ||
107 | s64 pool = ceph_file_layout_pg_pool(ci->i_layout); | ||
108 | const char *pool_name; | ||
109 | |||
110 | down_read(&osdc->map_sem); | ||
111 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); | ||
112 | if (pool_name) | ||
113 | ret = snprintf(val, size, "%s", pool_name); | ||
114 | else | ||
115 | ret = snprintf(val, size, "%lld", (unsigned long long)pool); | ||
116 | up_read(&osdc->map_sem); | ||
117 | return ret; | ||
118 | } | ||
119 | |||
35 | /* directories */ | 120 | /* directories */ |
36 | 121 | ||
37 | static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, | 122 | static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, |
@@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, | |||
83 | (long)ci->i_rctime.tv_nsec); | 168 | (long)ci->i_rctime.tv_nsec); |
84 | } | 169 | } |
85 | 170 | ||
86 | #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name | ||
87 | 171 | ||
88 | #define XATTR_NAME_CEPH(_type, _name) \ | 172 | #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name |
89 | { \ | 173 | #define CEPH_XATTR_NAME2(_type, _name, _name2) \ |
90 | .name = CEPH_XATTR_NAME(_type, _name), \ | 174 | XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 |
91 | .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ | 175 | |
92 | .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ | 176 | #define XATTR_NAME_CEPH(_type, _name) \ |
93 | .readonly = true, \ | 177 | { \ |
94 | } | 178 | .name = CEPH_XATTR_NAME(_type, _name), \ |
179 | .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ | ||
180 | .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ | ||
181 | .readonly = true, \ | ||
182 | .hidden = false, \ | ||
183 | .exists_cb = NULL, \ | ||
184 | } | ||
185 | #define XATTR_LAYOUT_FIELD(_type, _name, _field) \ | ||
186 | { \ | ||
187 | .name = CEPH_XATTR_NAME2(_type, _name, _field), \ | ||
188 | .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ | ||
189 | .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ | ||
190 | .readonly = false, \ | ||
191 | .hidden = true, \ | ||
192 | .exists_cb = ceph_vxattrcb_layout_exists, \ | ||
193 | } | ||
95 | 194 | ||
96 | static struct ceph_vxattr ceph_dir_vxattrs[] = { | 195 | static struct ceph_vxattr ceph_dir_vxattrs[] = { |
196 | { | ||
197 | .name = "ceph.dir.layout", | ||
198 | .name_size = sizeof("ceph.dir.layout"), | ||
199 | .getxattr_cb = ceph_vxattrcb_layout, | ||
200 | .readonly = false, | ||
201 | .hidden = false, | ||
202 | .exists_cb = ceph_vxattrcb_layout_exists, | ||
203 | }, | ||
204 | XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), | ||
205 | XATTR_LAYOUT_FIELD(dir, layout, stripe_count), | ||
206 | XATTR_LAYOUT_FIELD(dir, layout, object_size), | ||
207 | XATTR_LAYOUT_FIELD(dir, layout, pool), | ||
97 | XATTR_NAME_CEPH(dir, entries), | 208 | XATTR_NAME_CEPH(dir, entries), |
98 | XATTR_NAME_CEPH(dir, files), | 209 | XATTR_NAME_CEPH(dir, files), |
99 | XATTR_NAME_CEPH(dir, subdirs), | 210 | XATTR_NAME_CEPH(dir, subdirs), |
@@ -108,28 +219,19 @@ static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ | |||
108 | 219 | ||
109 | /* files */ | 220 | /* files */ |
110 | 221 | ||
111 | static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, | ||
112 | size_t size) | ||
113 | { | ||
114 | int ret; | ||
115 | |||
116 | ret = snprintf(val, size, | ||
117 | "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n", | ||
118 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | ||
119 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | ||
120 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); | ||
121 | return ret; | ||
122 | } | ||
123 | |||
124 | static struct ceph_vxattr ceph_file_vxattrs[] = { | 222 | static struct ceph_vxattr ceph_file_vxattrs[] = { |
125 | XATTR_NAME_CEPH(file, layout), | ||
126 | /* The following extended attribute name is deprecated */ | ||
127 | { | 223 | { |
128 | .name = XATTR_CEPH_PREFIX "layout", | 224 | .name = "ceph.file.layout", |
129 | .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), | 225 | .name_size = sizeof("ceph.file.layout"), |
130 | .getxattr_cb = ceph_vxattrcb_file_layout, | 226 | .getxattr_cb = ceph_vxattrcb_layout, |
131 | .readonly = true, | 227 | .readonly = false, |
228 | .hidden = false, | ||
229 | .exists_cb = ceph_vxattrcb_layout_exists, | ||
132 | }, | 230 | }, |
231 | XATTR_LAYOUT_FIELD(file, layout, stripe_unit), | ||
232 | XATTR_LAYOUT_FIELD(file, layout, stripe_count), | ||
233 | XATTR_LAYOUT_FIELD(file, layout, object_size), | ||
234 | XATTR_LAYOUT_FIELD(file, layout, pool), | ||
133 | { 0 } /* Required table terminator */ | 235 | { 0 } /* Required table terminator */ |
134 | }; | 236 | }; |
135 | static size_t ceph_file_vxattrs_name_size; /* total size of all names */ | 237 | static size_t ceph_file_vxattrs_name_size; /* total size of all names */ |
@@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) | |||
164 | size_t size = 0; | 266 | size_t size = 0; |
165 | 267 | ||
166 | for (vxattr = vxattrs; vxattr->name; vxattr++) | 268 | for (vxattr = vxattrs; vxattr->name; vxattr++) |
167 | size += vxattr->name_size; | 269 | if (!vxattr->hidden) |
270 | size += vxattr->name_size; | ||
168 | 271 | ||
169 | return size; | 272 | return size; |
170 | } | 273 | } |
@@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, | |||
572 | if (!ceph_is_valid_xattr(name)) | 675 | if (!ceph_is_valid_xattr(name)) |
573 | return -ENODATA; | 676 | return -ENODATA; |
574 | 677 | ||
575 | /* let's see if a virtual xattr was requested */ | ||
576 | vxattr = ceph_match_vxattr(inode, name); | ||
577 | |||
578 | spin_lock(&ci->i_ceph_lock); | 678 | spin_lock(&ci->i_ceph_lock); |
579 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, | 679 | dout("getxattr %p ver=%lld index_ver=%lld\n", inode, |
580 | ci->i_xattrs.version, ci->i_xattrs.index_version); | 680 | ci->i_xattrs.version, ci->i_xattrs.index_version); |
581 | 681 | ||
682 | /* let's see if a virtual xattr was requested */ | ||
683 | vxattr = ceph_match_vxattr(inode, name); | ||
684 | if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { | ||
685 | err = vxattr->getxattr_cb(ci, value, size); | ||
686 | goto out; | ||
687 | } | ||
688 | |||
582 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && | 689 | if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && |
583 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { | 690 | (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { |
584 | goto get_xattr; | 691 | goto get_xattr; |
@@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, | |||
592 | 699 | ||
593 | spin_lock(&ci->i_ceph_lock); | 700 | spin_lock(&ci->i_ceph_lock); |
594 | 701 | ||
595 | if (vxattr && vxattr->readonly) { | ||
596 | err = vxattr->getxattr_cb(ci, value, size); | ||
597 | goto out; | ||
598 | } | ||
599 | |||
600 | err = __build_xattrs(inode); | 702 | err = __build_xattrs(inode); |
601 | if (err < 0) | 703 | if (err < 0) |
602 | goto out; | 704 | goto out; |
@@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, | |||
604 | get_xattr: | 706 | get_xattr: |
605 | err = -ENODATA; /* == ENOATTR */ | 707 | err = -ENODATA; /* == ENOATTR */ |
606 | xattr = __get_xattr(ci, name); | 708 | xattr = __get_xattr(ci, name); |
607 | if (!xattr) { | 709 | if (!xattr) |
608 | if (vxattr) | ||
609 | err = vxattr->getxattr_cb(ci, value, size); | ||
610 | goto out; | 710 | goto out; |
611 | } | ||
612 | 711 | ||
613 | err = -ERANGE; | 712 | err = -ERANGE; |
614 | if (size && size < xattr->val_len) | 713 | if (size && size < xattr->val_len) |
@@ -664,23 +763,30 @@ list_xattr: | |||
664 | vir_namelen = ceph_vxattrs_name_size(vxattrs); | 763 | vir_namelen = ceph_vxattrs_name_size(vxattrs); |
665 | 764 | ||
666 | /* adding 1 byte per each variable due to the null termination */ | 765 | /* adding 1 byte per each variable due to the null termination */ |
667 | namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; | 766 | namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; |
668 | err = -ERANGE; | 767 | err = -ERANGE; |
669 | if (size && namelen > size) | 768 | if (size && vir_namelen + namelen > size) |
670 | goto out; | 769 | goto out; |
671 | 770 | ||
672 | err = namelen; | 771 | err = namelen + vir_namelen; |
673 | if (size == 0) | 772 | if (size == 0) |
674 | goto out; | 773 | goto out; |
675 | 774 | ||
676 | names = __copy_xattr_names(ci, names); | 775 | names = __copy_xattr_names(ci, names); |
677 | 776 | ||
678 | /* virtual xattr names, too */ | 777 | /* virtual xattr names, too */ |
679 | if (vxattrs) | 778 | err = namelen; |
779 | if (vxattrs) { | ||
680 | for (i = 0; vxattrs[i].name; i++) { | 780 | for (i = 0; vxattrs[i].name; i++) { |
681 | len = sprintf(names, "%s", vxattrs[i].name); | 781 | if (!vxattrs[i].hidden && |
682 | names += len + 1; | 782 | !(vxattrs[i].exists_cb && |
783 | !vxattrs[i].exists_cb(ci))) { | ||
784 | len = sprintf(names, "%s", vxattrs[i].name); | ||
785 | names += len + 1; | ||
786 | err += len + 1; | ||
787 | } | ||
683 | } | 788 | } |
789 | } | ||
684 | 790 | ||
685 | out: | 791 | out: |
686 | spin_unlock(&ci->i_ceph_lock); | 792 | spin_unlock(&ci->i_ceph_lock); |
@@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
782 | if (vxattr && vxattr->readonly) | 888 | if (vxattr && vxattr->readonly) |
783 | return -EOPNOTSUPP; | 889 | return -EOPNOTSUPP; |
784 | 890 | ||
891 | /* pass any unhandled ceph.* xattrs through to the MDS */ | ||
892 | if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) | ||
893 | goto do_sync_unlocked; | ||
894 | |||
785 | /* preallocate memory for xattr name, value, index node */ | 895 | /* preallocate memory for xattr name, value, index node */ |
786 | err = -ENOMEM; | 896 | err = -ENOMEM; |
787 | newname = kmemdup(name, name_len + 1, GFP_NOFS); | 897 | newname = kmemdup(name, name_len + 1, GFP_NOFS); |
@@ -838,6 +948,7 @@ retry: | |||
838 | 948 | ||
839 | do_sync: | 949 | do_sync: |
840 | spin_unlock(&ci->i_ceph_lock); | 950 | spin_unlock(&ci->i_ceph_lock); |
951 | do_sync_unlocked: | ||
841 | err = ceph_sync_setxattr(dentry, name, value, size, flags); | 952 | err = ceph_sync_setxattr(dentry, name, value, size, flags); |
842 | out: | 953 | out: |
843 | kfree(newname); | 954 | kfree(newname); |
@@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name) | |||
892 | if (vxattr && vxattr->readonly) | 1003 | if (vxattr && vxattr->readonly) |
893 | return -EOPNOTSUPP; | 1004 | return -EOPNOTSUPP; |
894 | 1005 | ||
1006 | /* pass any unhandled ceph.* xattrs through to the MDS */ | ||
1007 | if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) | ||
1008 | goto do_sync_unlocked; | ||
1009 | |||
895 | err = -ENOMEM; | 1010 | err = -ENOMEM; |
896 | spin_lock(&ci->i_ceph_lock); | 1011 | spin_lock(&ci->i_ceph_lock); |
897 | retry: | 1012 | retry: |
@@ -931,6 +1046,7 @@ retry: | |||
931 | return err; | 1046 | return err; |
932 | do_sync: | 1047 | do_sync: |
933 | spin_unlock(&ci->i_ceph_lock); | 1048 | spin_unlock(&ci->i_ceph_lock); |
1049 | do_sync_unlocked: | ||
934 | err = ceph_send_removexattr(dentry, name); | 1050 | err = ceph_send_removexattr(dentry, name); |
935 | out: | 1051 | out: |
936 | return err; | 1052 | return err; |
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index dad579b0c0e6..2160aab482f6 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h | |||
@@ -14,13 +14,19 @@ | |||
14 | #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) | 14 | #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) |
15 | /* bits 8-17 defined by user-space; not supported yet here */ | 15 | /* bits 8-17 defined by user-space; not supported yet here */ |
16 | #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) | 16 | #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) |
17 | /* bits 19-24 defined by user-space; not supported yet here */ | ||
18 | #define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) | ||
19 | /* bit 26 defined by user-space; not supported yet here */ | ||
20 | #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) | ||
17 | 21 | ||
18 | /* | 22 | /* |
19 | * Features supported. | 23 | * Features supported. |
20 | */ | 24 | */ |
21 | #define CEPH_FEATURES_SUPPORTED_DEFAULT \ | 25 | #define CEPH_FEATURES_SUPPORTED_DEFAULT \ |
22 | (CEPH_FEATURE_NOSRCADDR | \ | 26 | (CEPH_FEATURE_NOSRCADDR | \ |
23 | CEPH_FEATURE_CRUSH_TUNABLES) | 27 | CEPH_FEATURE_CRUSH_TUNABLES | \ |
28 | CEPH_FEATURE_CRUSH_TUNABLES2 | \ | ||
29 | CEPH_FEATURE_REPLY_CREATE_INODE) | ||
24 | 30 | ||
25 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ | 31 | #define CEPH_FEATURES_REQUIRED_DEFAULT \ |
26 | (CEPH_FEATURE_NOSRCADDR) | 32 | (CEPH_FEATURE_NOSRCADDR) |
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a76..2ad7b860f062 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h | |||
@@ -21,16 +21,14 @@ | |||
21 | * internal cluster protocols separately from the public, | 21 | * internal cluster protocols separately from the public, |
22 | * client-facing protocol. | 22 | * client-facing protocol. |
23 | */ | 23 | */ |
24 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ | ||
25 | #define CEPH_MDS_PROTOCOL 12 /* cluster internal */ | ||
26 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ | ||
27 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ | 24 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ |
28 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ | 25 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ |
29 | #define CEPH_MONC_PROTOCOL 15 /* server/client */ | 26 | #define CEPH_MONC_PROTOCOL 15 /* server/client */ |
30 | 27 | ||
31 | 28 | ||
32 | #define CEPH_INO_ROOT 1 | 29 | #define CEPH_INO_ROOT 1 |
33 | #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ | 30 | #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ |
31 | #define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ | ||
34 | 32 | ||
35 | /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ | 33 | /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ |
36 | #define CEPH_MAX_MON 31 | 34 | #define CEPH_MAX_MON 31 |
@@ -51,7 +49,7 @@ struct ceph_file_layout { | |||
51 | __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ | 49 | __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ |
52 | 50 | ||
53 | /* object -> pg layout */ | 51 | /* object -> pg layout */ |
54 | __le32 fl_unused; /* unused; used to be preferred primary (-1) */ | 52 | __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ |
55 | __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ | 53 | __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ |
56 | } __attribute__ ((packed)); | 54 | } __attribute__ ((packed)); |
57 | 55 | ||
@@ -101,6 +99,8 @@ struct ceph_dir_layout { | |||
101 | #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 | 99 | #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 |
102 | #define CEPH_MSG_AUTH 17 | 100 | #define CEPH_MSG_AUTH 17 |
103 | #define CEPH_MSG_AUTH_REPLY 18 | 101 | #define CEPH_MSG_AUTH_REPLY 18 |
102 | #define CEPH_MSG_MON_GET_VERSION 19 | ||
103 | #define CEPH_MSG_MON_GET_VERSION_REPLY 20 | ||
104 | 104 | ||
105 | /* client <-> mds */ | 105 | /* client <-> mds */ |
106 | #define CEPH_MSG_MDS_MAP 21 | 106 | #define CEPH_MSG_MDS_MAP 21 |
@@ -221,6 +221,11 @@ struct ceph_mon_subscribe_ack { | |||
221 | } __attribute__ ((packed)); | 221 | } __attribute__ ((packed)); |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * mdsmap flags | ||
225 | */ | ||
226 | #define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ | ||
227 | |||
228 | /* | ||
224 | * mds states | 229 | * mds states |
225 | * > 0 -> in | 230 | * > 0 -> in |
226 | * <= 0 -> out | 231 | * <= 0 -> out |
@@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { | |||
233 | #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ | 238 | #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ |
234 | #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ | 239 | #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ |
235 | #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ | 240 | #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ |
241 | #define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ | ||
236 | 242 | ||
237 | #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ | 243 | #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ |
238 | #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed | 244 | #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed |
@@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); | |||
264 | #define CEPH_LOCK_IXATTR 2048 | 270 | #define CEPH_LOCK_IXATTR 2048 |
265 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ | 271 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ |
266 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | 272 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ |
273 | #define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ | ||
267 | 274 | ||
268 | /* client_session ops */ | 275 | /* client_session ops */ |
269 | enum { | 276 | enum { |
@@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); | |||
338 | #define CEPH_SETATTR_SIZE 32 | 345 | #define CEPH_SETATTR_SIZE 32 |
339 | #define CEPH_SETATTR_CTIME 64 | 346 | #define CEPH_SETATTR_CTIME 64 |
340 | 347 | ||
348 | /* | ||
349 | * Ceph setxattr request flags. | ||
350 | */ | ||
351 | #define CEPH_XATTR_CREATE 1 | ||
352 | #define CEPH_XATTR_REPLACE 2 | ||
353 | |||
341 | union ceph_mds_request_args { | 354 | union ceph_mds_request_args { |
342 | struct { | 355 | struct { |
343 | __le32 mask; /* CEPH_CAP_* */ | 356 | __le32 mask; /* CEPH_CAP_* */ |
@@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); | |||
522 | #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ | 535 | #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ |
523 | #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ | 536 | #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ |
524 | 537 | ||
538 | #define CEPH_CAP_SIMPLE_BITS 2 | ||
539 | #define CEPH_CAP_FILE_BITS 8 | ||
540 | |||
525 | /* per-lock shift */ | 541 | /* per-lock shift */ |
526 | #define CEPH_CAP_SAUTH 2 | 542 | #define CEPH_CAP_SAUTH 2 |
527 | #define CEPH_CAP_SLINK 4 | 543 | #define CEPH_CAP_SLINK 4 |
528 | #define CEPH_CAP_SXATTR 6 | 544 | #define CEPH_CAP_SXATTR 6 |
529 | #define CEPH_CAP_SFILE 8 | 545 | #define CEPH_CAP_SFILE 8 |
530 | #define CEPH_CAP_SFLOCK 20 | 546 | #define CEPH_CAP_SFLOCK 20 |
531 | 547 | ||
532 | #define CEPH_CAP_BITS 22 | 548 | #define CEPH_CAP_BITS 22 |
533 | 549 | ||
534 | /* composed values */ | 550 | /* composed values */ |
535 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | 551 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) |
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 63d092822bad..360d9d08ca9e 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h | |||
@@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n) | |||
52 | return end >= *p && n <= end - *p; | 52 | return end >= *p && n <= end - *p; |
53 | } | 53 | } |
54 | 54 | ||
55 | #define ceph_decode_need(p, end, n, bad) \ | 55 | #define ceph_decode_need(p, end, n, bad) \ |
56 | do { \ | 56 | do { \ |
57 | if (!likely(ceph_has_room(p, end, n))) \ | 57 | if (!likely(ceph_has_room(p, end, n))) \ |
58 | goto bad; \ | 58 | goto bad; \ |
59 | } while (0) | 59 | } while (0) |
60 | 60 | ||
61 | #define ceph_decode_64_safe(p, end, v, bad) \ | 61 | #define ceph_decode_64_safe(p, end, v, bad) \ |
@@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n) | |||
99 | * | 99 | * |
100 | * There are two possible failures: | 100 | * There are two possible failures: |
101 | * - converting the string would require accessing memory at or | 101 | * - converting the string would require accessing memory at or |
102 | * beyond the "end" pointer provided (-E | 102 | * beyond the "end" pointer provided (-ERANGE) |
103 | * - memory could not be allocated for the result | 103 | * - memory could not be allocated for the result (-ENOMEM) |
104 | */ | 104 | */ |
105 | static inline char *ceph_extract_encoded_string(void **p, void *end, | 105 | static inline char *ceph_extract_encoded_string(void **p, void *end, |
106 | size_t *lenp, gfp_t gfp) | 106 | size_t *lenp, gfp_t gfp) |
@@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end, | |||
217 | *p += len; | 217 | *p += len; |
218 | } | 218 | } |
219 | 219 | ||
220 | #define ceph_encode_need(p, end, n, bad) \ | 220 | #define ceph_encode_need(p, end, n, bad) \ |
221 | do { \ | 221 | do { \ |
222 | if (!likely(ceph_has_room(p, end, n))) \ | 222 | if (!likely(ceph_has_room(p, end, n))) \ |
223 | goto bad; \ | 223 | goto bad; \ |
224 | } while (0) | 224 | } while (0) |
225 | 225 | ||
226 | #define ceph_encode_64_safe(p, end, v, bad) \ | 226 | #define ceph_encode_64_safe(p, end, v, bad) \ |
@@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end, | |||
231 | #define ceph_encode_32_safe(p, end, v, bad) \ | 231 | #define ceph_encode_32_safe(p, end, v, bad) \ |
232 | do { \ | 232 | do { \ |
233 | ceph_encode_need(p, end, sizeof(u32), bad); \ | 233 | ceph_encode_need(p, end, sizeof(u32), bad); \ |
234 | ceph_encode_32(p, v); \ | 234 | ceph_encode_32(p, v); \ |
235 | } while (0) | 235 | } while (0) |
236 | #define ceph_encode_16_safe(p, end, v, bad) \ | 236 | #define ceph_encode_16_safe(p, end, v, bad) \ |
237 | do { \ | 237 | do { \ |
238 | ceph_encode_need(p, end, sizeof(u16), bad); \ | 238 | ceph_encode_need(p, end, sizeof(u16), bad); \ |
239 | ceph_encode_16(p, v); \ | 239 | ceph_encode_16(p, v); \ |
240 | } while (0) | ||
241 | #define ceph_encode_8_safe(p, end, v, bad) \ | ||
242 | do { \ | ||
243 | ceph_encode_need(p, end, sizeof(u8), bad); \ | ||
244 | ceph_encode_8(p, v); \ | ||
240 | } while (0) | 245 | } while (0) |
241 | 246 | ||
242 | #define ceph_encode_copy_safe(p, end, pv, n, bad) \ | 247 | #define ceph_encode_copy_safe(p, end, pv, n, bad) \ |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b12..29818fc3fa49 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) | |||
193 | } | 193 | } |
194 | 194 | ||
195 | /* ceph_common.c */ | 195 | /* ceph_common.c */ |
196 | extern bool libceph_compatible(void *data); | ||
197 | |||
196 | extern const char *ceph_msg_type_name(int type); | 198 | extern const char *ceph_msg_type_name(int type); |
197 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | 199 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); |
198 | extern struct kmem_cache *ceph_inode_cachep; | 200 | extern struct kmem_cache *ceph_inode_cachep; |
@@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); | |||
220 | /* pagevec.c */ | 222 | /* pagevec.c */ |
221 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | 223 | extern void ceph_release_page_vector(struct page **pages, int num_pages); |
222 | 224 | ||
223 | extern struct page **ceph_get_direct_page_vector(const char __user *data, | 225 | extern struct page **ceph_get_direct_page_vector(const void __user *data, |
224 | int num_pages, | 226 | int num_pages, |
225 | bool write_page); | 227 | bool write_page); |
226 | extern void ceph_put_page_vector(struct page **pages, int num_pages, | 228 | extern void ceph_put_page_vector(struct page **pages, int num_pages, |
@@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, | |||
228 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | 230 | extern void ceph_release_page_vector(struct page **pages, int num_pages); |
229 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | 231 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); |
230 | extern int ceph_copy_user_to_page_vector(struct page **pages, | 232 | extern int ceph_copy_user_to_page_vector(struct page **pages, |
231 | const char __user *data, | 233 | const void __user *data, |
232 | loff_t off, size_t len); | 234 | loff_t off, size_t len); |
233 | extern int ceph_copy_to_page_vector(struct page **pages, | 235 | extern void ceph_copy_to_page_vector(struct page **pages, |
234 | const char *data, | 236 | const void *data, |
235 | loff_t off, size_t len); | 237 | loff_t off, size_t len); |
236 | extern int ceph_copy_from_page_vector(struct page **pages, | 238 | extern void ceph_copy_from_page_vector(struct page **pages, |
237 | char *data, | 239 | void *data, |
238 | loff_t off, size_t len); | 240 | loff_t off, size_t len); |
239 | extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, | 241 | extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, |
240 | loff_t off, size_t len); | 242 | loff_t off, size_t len); |
241 | extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); | 243 | extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); |
242 | 244 | ||
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a9..60903e0f665c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -83,9 +83,11 @@ struct ceph_msg { | |||
83 | struct list_head list_head; | 83 | struct list_head list_head; |
84 | 84 | ||
85 | struct kref kref; | 85 | struct kref kref; |
86 | #ifdef CONFIG_BLOCK | ||
86 | struct bio *bio; /* instead of pages/pagelist */ | 87 | struct bio *bio; /* instead of pages/pagelist */ |
87 | struct bio *bio_iter; /* bio iterator */ | 88 | struct bio *bio_iter; /* bio iterator */ |
88 | int bio_seg; /* current bio segment */ | 89 | int bio_seg; /* current bio segment */ |
90 | #endif /* CONFIG_BLOCK */ | ||
89 | struct ceph_pagelist *trail; /* the trailing part of the data */ | 91 | struct ceph_pagelist *trail; /* the trailing part of the data */ |
90 | bool front_is_vmalloc; | 92 | bool front_is_vmalloc; |
91 | bool more_to_follow; | 93 | bool more_to_follow; |
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d9b880e977e6..388158ff0cbc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/ceph/osdmap.h> | 10 | #include <linux/ceph/osdmap.h> |
11 | #include <linux/ceph/messenger.h> | 11 | #include <linux/ceph/messenger.h> |
12 | #include <linux/ceph/auth.h> | 12 | #include <linux/ceph/auth.h> |
13 | #include <linux/ceph/pagelist.h> | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * Maximum object name size | 16 | * Maximum object name size |
@@ -22,7 +23,6 @@ struct ceph_snap_context; | |||
22 | struct ceph_osd_request; | 23 | struct ceph_osd_request; |
23 | struct ceph_osd_client; | 24 | struct ceph_osd_client; |
24 | struct ceph_authorizer; | 25 | struct ceph_authorizer; |
25 | struct ceph_pagelist; | ||
26 | 26 | ||
27 | /* | 27 | /* |
28 | * completion callback for async writepages | 28 | * completion callback for async writepages |
@@ -95,7 +95,7 @@ struct ceph_osd_request { | |||
95 | struct bio *r_bio; /* instead of pages */ | 95 | struct bio *r_bio; /* instead of pages */ |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | struct ceph_pagelist *r_trail; /* trailing part of the data */ | 98 | struct ceph_pagelist r_trail; /* trailing part of the data */ |
99 | }; | 99 | }; |
100 | 100 | ||
101 | struct ceph_osd_event { | 101 | struct ceph_osd_event { |
@@ -107,7 +107,6 @@ struct ceph_osd_event { | |||
107 | struct rb_node node; | 107 | struct rb_node node; |
108 | struct list_head osd_node; | 108 | struct list_head osd_node; |
109 | struct kref kref; | 109 | struct kref kref; |
110 | struct completion completion; | ||
111 | }; | 110 | }; |
112 | 111 | ||
113 | struct ceph_osd_event_work { | 112 | struct ceph_osd_event_work { |
@@ -157,7 +156,7 @@ struct ceph_osd_client { | |||
157 | 156 | ||
158 | struct ceph_osd_req_op { | 157 | struct ceph_osd_req_op { |
159 | u16 op; /* CEPH_OSD_OP_* */ | 158 | u16 op; /* CEPH_OSD_OP_* */ |
160 | u32 flags; /* CEPH_OSD_FLAG_* */ | 159 | u32 payload_len; |
161 | union { | 160 | union { |
162 | struct { | 161 | struct { |
163 | u64 offset, length; | 162 | u64 offset, length; |
@@ -166,23 +165,24 @@ struct ceph_osd_req_op { | |||
166 | } extent; | 165 | } extent; |
167 | struct { | 166 | struct { |
168 | const char *name; | 167 | const char *name; |
169 | u32 name_len; | ||
170 | const char *val; | 168 | const char *val; |
169 | u32 name_len; | ||
171 | u32 value_len; | 170 | u32 value_len; |
172 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | 171 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ |
173 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | 172 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ |
174 | } xattr; | 173 | } xattr; |
175 | struct { | 174 | struct { |
176 | const char *class_name; | 175 | const char *class_name; |
177 | __u8 class_len; | ||
178 | const char *method_name; | 176 | const char *method_name; |
179 | __u8 method_len; | ||
180 | __u8 argc; | ||
181 | const char *indata; | 177 | const char *indata; |
182 | u32 indata_len; | 178 | u32 indata_len; |
179 | __u8 class_len; | ||
180 | __u8 method_len; | ||
181 | __u8 argc; | ||
183 | } cls; | 182 | } cls; |
184 | struct { | 183 | struct { |
185 | u64 cookie, count; | 184 | u64 cookie; |
185 | u64 count; | ||
186 | } pgls; | 186 | } pgls; |
187 | struct { | 187 | struct { |
188 | u64 snapid; | 188 | u64 snapid; |
@@ -190,12 +190,11 @@ struct ceph_osd_req_op { | |||
190 | struct { | 190 | struct { |
191 | u64 cookie; | 191 | u64 cookie; |
192 | u64 ver; | 192 | u64 ver; |
193 | __u8 flag; | ||
194 | u32 prot_ver; | 193 | u32 prot_ver; |
195 | u32 timeout; | 194 | u32 timeout; |
195 | __u8 flag; | ||
196 | } watch; | 196 | } watch; |
197 | }; | 197 | }; |
198 | u32 payload_len; | ||
199 | }; | 198 | }; |
200 | 199 | ||
201 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | 200 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, |
@@ -207,29 +206,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | |||
207 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | 206 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, |
208 | struct ceph_msg *msg); | 207 | struct ceph_msg *msg); |
209 | 208 | ||
210 | extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
211 | struct ceph_file_layout *layout, | ||
212 | u64 snapid, | ||
213 | u64 off, u64 *plen, u64 *bno, | ||
214 | struct ceph_osd_request *req, | ||
215 | struct ceph_osd_req_op *op); | ||
216 | |||
217 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | 209 | extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
218 | int flags, | ||
219 | struct ceph_snap_context *snapc, | 210 | struct ceph_snap_context *snapc, |
220 | struct ceph_osd_req_op *ops, | 211 | unsigned int num_op, |
221 | bool use_mempool, | 212 | bool use_mempool, |
222 | gfp_t gfp_flags, | 213 | gfp_t gfp_flags); |
223 | struct page **pages, | ||
224 | struct bio *bio); | ||
225 | 214 | ||
226 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, | 215 | extern void ceph_osdc_build_request(struct ceph_osd_request *req, |
227 | u64 off, u64 *plen, | 216 | u64 off, u64 len, |
217 | unsigned int num_op, | ||
228 | struct ceph_osd_req_op *src_ops, | 218 | struct ceph_osd_req_op *src_ops, |
229 | struct ceph_snap_context *snapc, | 219 | struct ceph_snap_context *snapc, |
230 | struct timespec *mtime, | 220 | u64 snap_id, |
231 | const char *oid, | 221 | struct timespec *mtime); |
232 | int oid_len); | ||
233 | 222 | ||
234 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | 223 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, |
235 | struct ceph_file_layout *layout, | 224 | struct ceph_file_layout *layout, |
@@ -239,8 +228,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | |||
239 | int do_sync, u32 truncate_seq, | 228 | int do_sync, u32 truncate_seq, |
240 | u64 truncate_size, | 229 | u64 truncate_size, |
241 | struct timespec *mtime, | 230 | struct timespec *mtime, |
242 | bool use_mempool, int num_reply, | 231 | bool use_mempool, int page_align); |
243 | int page_align); | ||
244 | 232 | ||
245 | extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, | 233 | extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, |
246 | struct ceph_osd_request *req); | 234 | struct ceph_osd_request *req); |
@@ -279,17 +267,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, | |||
279 | u64 off, u64 len, | 267 | u64 off, u64 len, |
280 | u32 truncate_seq, u64 truncate_size, | 268 | u32 truncate_seq, u64 truncate_size, |
281 | struct timespec *mtime, | 269 | struct timespec *mtime, |
282 | struct page **pages, int nr_pages, | 270 | struct page **pages, int nr_pages); |
283 | int flags, int do_sync, bool nofail); | ||
284 | 271 | ||
285 | /* watch/notify events */ | 272 | /* watch/notify events */ |
286 | extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, | 273 | extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, |
287 | void (*event_cb)(u64, u64, u8, void *), | 274 | void (*event_cb)(u64, u64, u8, void *), |
288 | int one_shot, void *data, | 275 | void *data, struct ceph_osd_event **pevent); |
289 | struct ceph_osd_event **pevent); | ||
290 | extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); | 276 | extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); |
291 | extern int ceph_osdc_wait_event(struct ceph_osd_event *event, | ||
292 | unsigned long timeout); | ||
293 | extern void ceph_osdc_put_event(struct ceph_osd_event *event); | 277 | extern void ceph_osdc_put_event(struct ceph_osd_event *event); |
294 | #endif | 278 | #endif |
295 | 279 | ||
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 10a417f9f76f..c83a838f89f5 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h | |||
@@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map); | |||
110 | 110 | ||
111 | /* calculate mapping of a file extent to an object */ | 111 | /* calculate mapping of a file extent to an object */ |
112 | extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | 112 | extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, |
113 | u64 off, u64 *plen, | 113 | u64 off, u64 len, |
114 | u64 *bno, u64 *oxoff, u64 *oxlen); | 114 | u64 *bno, u64 *oxoff, u64 *oxlen); |
115 | 115 | ||
116 | /* calculate mapping of object to a placement group */ | 116 | /* calculate mapping of object to a placement group */ |
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..b65182aba6f7 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h | |||
@@ -145,8 +145,12 @@ struct ceph_eversion { | |||
145 | */ | 145 | */ |
146 | 146 | ||
147 | /* status bits */ | 147 | /* status bits */ |
148 | #define CEPH_OSD_EXISTS 1 | 148 | #define CEPH_OSD_EXISTS (1<<0) |
149 | #define CEPH_OSD_UP 2 | 149 | #define CEPH_OSD_UP (1<<1) |
150 | #define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ | ||
151 | #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ | ||
152 | |||
153 | extern const char *ceph_osd_state_name(int s); | ||
150 | 154 | ||
151 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ | 155 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ |
152 | #define CEPH_OSD_IN 0x10000 | 156 | #define CEPH_OSD_IN 0x10000 |
@@ -161,9 +165,25 @@ struct ceph_eversion { | |||
161 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ | 165 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ |
162 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ | 166 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ |
163 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ | 167 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ |
168 | #define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ | ||
169 | #define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ | ||
170 | #define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ | ||
171 | #define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ | ||
172 | #define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ | ||
173 | #define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ | ||
174 | |||
175 | /* | ||
176 | * The error code to return when an OSD can't handle a write | ||
177 | * because it is too large. | ||
178 | */ | ||
179 | #define OSD_WRITETOOBIG EMSGSIZE | ||
164 | 180 | ||
165 | /* | 181 | /* |
166 | * osd ops | 182 | * osd ops |
183 | * | ||
184 | * WARNING: do not use these op codes directly. Use the helpers | ||
185 | * defined below instead. In certain cases, op code behavior was | ||
186 | * redefined, resulting in special-cases in the helpers. | ||
167 | */ | 187 | */ |
168 | #define CEPH_OSD_OP_MODE 0xf000 | 188 | #define CEPH_OSD_OP_MODE 0xf000 |
169 | #define CEPH_OSD_OP_MODE_RD 0x1000 | 189 | #define CEPH_OSD_OP_MODE_RD 0x1000 |
@@ -177,6 +197,7 @@ struct ceph_eversion { | |||
177 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 | 197 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 |
178 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 | 198 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 |
179 | #define CEPH_OSD_OP_TYPE_PG 0x0500 | 199 | #define CEPH_OSD_OP_TYPE_PG 0x0500 |
200 | #define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ | ||
180 | 201 | ||
181 | enum { | 202 | enum { |
182 | /** data **/ | 203 | /** data **/ |
@@ -217,6 +238,23 @@ enum { | |||
217 | 238 | ||
218 | CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, | 239 | CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, |
219 | 240 | ||
241 | /* omap */ | ||
242 | CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, | ||
243 | CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, | ||
244 | CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, | ||
245 | CEPH_OSD_OP_OMAPGETVALSBYKEYS = | ||
246 | CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, | ||
247 | CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, | ||
248 | CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, | ||
249 | CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, | ||
250 | CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, | ||
251 | CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, | ||
252 | |||
253 | /** multi **/ | ||
254 | CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, | ||
255 | CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, | ||
256 | CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, | ||
257 | |||
220 | /** attrs **/ | 258 | /** attrs **/ |
221 | /* read */ | 259 | /* read */ |
222 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | 260 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, |
@@ -238,6 +276,7 @@ enum { | |||
238 | CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, | 276 | CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, |
239 | CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, | 277 | CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, |
240 | CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, | 278 | CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, |
279 | CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, | ||
241 | 280 | ||
242 | /** lock **/ | 281 | /** lock **/ |
243 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, | 282 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, |
@@ -248,10 +287,12 @@ enum { | |||
248 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, | 287 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, |
249 | 288 | ||
250 | /** exec **/ | 289 | /** exec **/ |
290 | /* note: the RD bit here is wrong; see special-case below in helper */ | ||
251 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, | 291 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, |
252 | 292 | ||
253 | /** pg **/ | 293 | /** pg **/ |
254 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, | 294 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, |
295 | CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, | ||
255 | }; | 296 | }; |
256 | 297 | ||
257 | static inline int ceph_osd_op_type_lock(int op) | 298 | static inline int ceph_osd_op_type_lock(int op) |
@@ -274,6 +315,10 @@ static inline int ceph_osd_op_type_pg(int op) | |||
274 | { | 315 | { |
275 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; | 316 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; |
276 | } | 317 | } |
318 | static inline int ceph_osd_op_type_multi(int op) | ||
319 | { | ||
320 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; | ||
321 | } | ||
277 | 322 | ||
278 | static inline int ceph_osd_op_mode_subop(int op) | 323 | static inline int ceph_osd_op_mode_subop(int op) |
279 | { | 324 | { |
@@ -281,11 +326,12 @@ static inline int ceph_osd_op_mode_subop(int op) | |||
281 | } | 326 | } |
282 | static inline int ceph_osd_op_mode_read(int op) | 327 | static inline int ceph_osd_op_mode_read(int op) |
283 | { | 328 | { |
284 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; | 329 | return (op & CEPH_OSD_OP_MODE_RD) && |
330 | op != CEPH_OSD_OP_CALL; | ||
285 | } | 331 | } |
286 | static inline int ceph_osd_op_mode_modify(int op) | 332 | static inline int ceph_osd_op_mode_modify(int op) |
287 | { | 333 | { |
288 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | 334 | return op & CEPH_OSD_OP_MODE_WR; |
289 | } | 335 | } |
290 | 336 | ||
291 | /* | 337 | /* |
@@ -294,34 +340,38 @@ static inline int ceph_osd_op_mode_modify(int op) | |||
294 | */ | 340 | */ |
295 | #define CEPH_OSD_TMAP_HDR 'h' | 341 | #define CEPH_OSD_TMAP_HDR 'h' |
296 | #define CEPH_OSD_TMAP_SET 's' | 342 | #define CEPH_OSD_TMAP_SET 's' |
343 | #define CEPH_OSD_TMAP_CREATE 'c' /* create key */ | ||
297 | #define CEPH_OSD_TMAP_RM 'r' | 344 | #define CEPH_OSD_TMAP_RM 'r' |
345 | #define CEPH_OSD_TMAP_RMSLOPPY 'R' | ||
298 | 346 | ||
299 | extern const char *ceph_osd_op_name(int op); | 347 | extern const char *ceph_osd_op_name(int op); |
300 | 348 | ||
301 | |||
302 | /* | 349 | /* |
303 | * osd op flags | 350 | * osd op flags |
304 | * | 351 | * |
305 | * An op may be READ, WRITE, or READ|WRITE. | 352 | * An op may be READ, WRITE, or READ|WRITE. |
306 | */ | 353 | */ |
307 | enum { | 354 | enum { |
308 | CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ | 355 | CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ |
309 | CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ | 356 | CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ |
310 | CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ | 357 | CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ |
311 | CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ | 358 | CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ |
312 | CEPH_OSD_FLAG_READ = 16, /* op may read */ | 359 | CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ |
313 | CEPH_OSD_FLAG_WRITE = 32, /* op may write */ | 360 | CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ |
314 | CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ | 361 | CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ |
315 | CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ | 362 | CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ |
316 | CEPH_OSD_FLAG_BALANCE_READS = 256, | 363 | CEPH_OSD_FLAG_BALANCE_READS = 0x0100, |
317 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | 364 | CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ |
318 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | 365 | CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ |
319 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | 366 | CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ |
320 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | 367 | CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ |
368 | CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ | ||
369 | CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ | ||
321 | }; | 370 | }; |
322 | 371 | ||
323 | enum { | 372 | enum { |
324 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ | 373 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ |
374 | CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ | ||
325 | }; | 375 | }; |
326 | 376 | ||
327 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | 377 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ |
@@ -381,7 +431,11 @@ struct ceph_osd_op { | |||
381 | __le64 ver; | 431 | __le64 ver; |
382 | __u8 flag; /* 0 = unwatch, 1 = watch */ | 432 | __u8 flag; /* 0 = unwatch, 1 = watch */ |
383 | } __attribute__ ((packed)) watch; | 433 | } __attribute__ ((packed)) watch; |
384 | }; | 434 | struct { |
435 | __le64 offset, length; | ||
436 | __le64 src_offset; | ||
437 | } __attribute__ ((packed)) clonerange; | ||
438 | }; | ||
385 | __le32 payload_len; | 439 | __le32 payload_len; |
386 | } __attribute__ ((packed)); | 440 | } __attribute__ ((packed)); |
387 | 441 | ||
@@ -424,5 +478,4 @@ struct ceph_osd_reply_head { | |||
424 | } __attribute__ ((packed)); | 478 | } __attribute__ ((packed)); |
425 | 479 | ||
426 | 480 | ||
427 | |||
428 | #endif | 481 | #endif |
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 25baa287cff7..6a1101f24cfb 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h | |||
@@ -162,6 +162,8 @@ struct crush_map { | |||
162 | __u32 choose_local_fallback_tries; | 162 | __u32 choose_local_fallback_tries; |
163 | /* choose attempts before giving up */ | 163 | /* choose attempts before giving up */ |
164 | __u32 choose_total_tries; | 164 | __u32 choose_total_tries; |
165 | /* attempt chooseleaf inner descent once; on failure retry outer descent */ | ||
166 | __u32 chooseleaf_descend_once; | ||
165 | }; | 167 | }; |
166 | 168 | ||
167 | 169 | ||
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ee71ea26777a..c236c235c4a2 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
@@ -26,6 +26,22 @@ | |||
26 | #include "crypto.h" | 26 | #include "crypto.h" |
27 | 27 | ||
28 | 28 | ||
29 | /* | ||
30 | * Module compatibility interface. For now it doesn't do anything, | ||
31 | * but its existence signals a certain level of functionality. | ||
32 | * | ||
33 | * The data buffer is used to pass information both to and from | ||
34 | * libceph. The return value indicates whether libceph determines | ||
35 | * it is compatible with the caller (from another kernel module), | ||
36 | * given the provided data. | ||
37 | * | ||
38 | * The data pointer can be null. | ||
39 | */ | ||
40 | bool libceph_compatible(void *data) | ||
41 | { | ||
42 | return true; | ||
43 | } | ||
44 | EXPORT_SYMBOL(libceph_compatible); | ||
29 | 45 | ||
30 | /* | 46 | /* |
31 | * find filename portion of a path (/foo/bar/baz -> baz) | 47 | * find filename portion of a path (/foo/bar/baz -> baz) |
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3fbda04de29c..1348df96fe15 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c | |||
@@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op) | |||
21 | switch (op) { | 21 | switch (op) { |
22 | case CEPH_OSD_OP_READ: return "read"; | 22 | case CEPH_OSD_OP_READ: return "read"; |
23 | case CEPH_OSD_OP_STAT: return "stat"; | 23 | case CEPH_OSD_OP_STAT: return "stat"; |
24 | case CEPH_OSD_OP_MAPEXT: return "mapext"; | ||
25 | case CEPH_OSD_OP_SPARSE_READ: return "sparse-read"; | ||
26 | case CEPH_OSD_OP_NOTIFY: return "notify"; | ||
27 | case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack"; | ||
28 | case CEPH_OSD_OP_ASSERT_VER: return "assert-version"; | ||
24 | 29 | ||
25 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | 30 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; |
26 | 31 | ||
32 | case CEPH_OSD_OP_CREATE: return "create"; | ||
27 | case CEPH_OSD_OP_WRITE: return "write"; | 33 | case CEPH_OSD_OP_WRITE: return "write"; |
28 | case CEPH_OSD_OP_DELETE: return "delete"; | 34 | case CEPH_OSD_OP_DELETE: return "delete"; |
29 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | 35 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; |
@@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op) | |||
39 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | 45 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; |
40 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | 46 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; |
41 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | 47 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; |
48 | case CEPH_OSD_OP_WATCH: return "watch"; | ||
49 | |||
50 | case CEPH_OSD_OP_CLONERANGE: return "clonerange"; | ||
51 | case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version"; | ||
52 | case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr"; | ||
42 | 53 | ||
43 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | 54 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; |
44 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | 55 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; |
@@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op) | |||
53 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | 64 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; |
54 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | 65 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; |
55 | case CEPH_OSD_OP_SCRUB: return "scrub"; | 66 | case CEPH_OSD_OP_SCRUB: return "scrub"; |
67 | case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve"; | ||
68 | case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve"; | ||
69 | case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop"; | ||
70 | case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map"; | ||
56 | 71 | ||
57 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | 72 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; |
58 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | 73 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; |
@@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op) | |||
64 | case CEPH_OSD_OP_CALL: return "call"; | 79 | case CEPH_OSD_OP_CALL: return "call"; |
65 | 80 | ||
66 | case CEPH_OSD_OP_PGLS: return "pgls"; | 81 | case CEPH_OSD_OP_PGLS: return "pgls"; |
82 | case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter"; | ||
83 | case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys"; | ||
84 | case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals"; | ||
85 | case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header"; | ||
86 | case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys"; | ||
87 | case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals"; | ||
88 | case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header"; | ||
89 | case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear"; | ||
90 | case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys"; | ||
67 | } | 91 | } |
68 | return "???"; | 92 | return "???"; |
69 | } | 93 | } |
70 | 94 | ||
95 | const char *ceph_osd_state_name(int s) | ||
96 | { | ||
97 | switch (s) { | ||
98 | case CEPH_OSD_EXISTS: | ||
99 | return "exists"; | ||
100 | case CEPH_OSD_UP: | ||
101 | return "up"; | ||
102 | case CEPH_OSD_AUTOOUT: | ||
103 | return "autoout"; | ||
104 | case CEPH_OSD_NEW: | ||
105 | return "new"; | ||
106 | default: | ||
107 | return "???"; | ||
108 | } | ||
109 | } | ||
71 | 110 | ||
72 | const char *ceph_pool_op_name(int op) | 111 | const char *ceph_pool_op_name(int op) |
73 | { | 112 | { |
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 35fce755ce10..cbd06a91941c 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in | |||
287 | * @outpos: our position in that vector | 287 | * @outpos: our position in that vector |
288 | * @firstn: true if choosing "first n" items, false if choosing "indep" | 288 | * @firstn: true if choosing "first n" items, false if choosing "indep" |
289 | * @recurse_to_leaf: true if we want one device under each item of given type | 289 | * @recurse_to_leaf: true if we want one device under each item of given type |
290 | * @descend_once: true if we should only try one descent before giving up | ||
290 | * @out2: second output vector for leaf items (if @recurse_to_leaf) | 291 | * @out2: second output vector for leaf items (if @recurse_to_leaf) |
291 | */ | 292 | */ |
292 | static int crush_choose(const struct crush_map *map, | 293 | static int crush_choose(const struct crush_map *map, |
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map, | |||
295 | int x, int numrep, int type, | 296 | int x, int numrep, int type, |
296 | int *out, int outpos, | 297 | int *out, int outpos, |
297 | int firstn, int recurse_to_leaf, | 298 | int firstn, int recurse_to_leaf, |
298 | int *out2) | 299 | int descend_once, int *out2) |
299 | { | 300 | { |
300 | int rep; | 301 | int rep; |
301 | unsigned int ftotal, flocal; | 302 | unsigned int ftotal, flocal; |
@@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map, | |||
391 | } | 392 | } |
392 | 393 | ||
393 | reject = 0; | 394 | reject = 0; |
394 | if (recurse_to_leaf) { | 395 | if (!collide && recurse_to_leaf) { |
395 | if (item < 0) { | 396 | if (item < 0) { |
396 | if (crush_choose(map, | 397 | if (crush_choose(map, |
397 | map->buckets[-1-item], | 398 | map->buckets[-1-item], |
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map, | |||
399 | x, outpos+1, 0, | 400 | x, outpos+1, 0, |
400 | out2, outpos, | 401 | out2, outpos, |
401 | firstn, 0, | 402 | firstn, 0, |
403 | map->chooseleaf_descend_once, | ||
402 | NULL) <= outpos) | 404 | NULL) <= outpos) |
403 | /* didn't get leaf */ | 405 | /* didn't get leaf */ |
404 | reject = 1; | 406 | reject = 1; |
@@ -422,7 +424,10 @@ reject: | |||
422 | ftotal++; | 424 | ftotal++; |
423 | flocal++; | 425 | flocal++; |
424 | 426 | ||
425 | if (collide && flocal <= map->choose_local_tries) | 427 | if (reject && descend_once) |
428 | /* let outer call try again */ | ||
429 | skip_rep = 1; | ||
430 | else if (collide && flocal <= map->choose_local_tries) | ||
426 | /* retry locally a few times */ | 431 | /* retry locally a few times */ |
427 | retry_bucket = 1; | 432 | retry_bucket = 1; |
428 | else if (map->choose_local_fallback_tries > 0 && | 433 | else if (map->choose_local_fallback_tries > 0 && |
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map, | |||
485 | int i, j; | 490 | int i, j; |
486 | int numrep; | 491 | int numrep; |
487 | int firstn; | 492 | int firstn; |
493 | const int descend_once = 0; | ||
488 | 494 | ||
489 | if ((__u32)ruleno >= map->max_rules) { | 495 | if ((__u32)ruleno >= map->max_rules) { |
490 | dprintk(" bad ruleno %d\n", ruleno); | 496 | dprintk(" bad ruleno %d\n", ruleno); |
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map, | |||
544 | curstep->arg2, | 550 | curstep->arg2, |
545 | o+osize, j, | 551 | o+osize, j, |
546 | firstn, | 552 | firstn, |
547 | recurse_to_leaf, c+osize); | 553 | recurse_to_leaf, |
554 | descend_once, c+osize); | ||
548 | } | 555 | } |
549 | 556 | ||
550 | if (recurse_to_leaf) | 557 | if (recurse_to_leaf) |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5ccf87ed8d68..8a62a559a2aa 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -9,8 +9,9 @@ | |||
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/socket.h> | 10 | #include <linux/socket.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #ifdef CONFIG_BLOCK | ||
12 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
13 | #include <linux/blkdev.h> | 14 | #endif /* CONFIG_BLOCK */ |
14 | #include <linux/dns_resolver.h> | 15 | #include <linux/dns_resolver.h> |
15 | #include <net/tcp.h> | 16 | #include <net/tcp.h> |
16 | 17 | ||
@@ -2651,9 +2652,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, | |||
2651 | m->page_alignment = 0; | 2652 | m->page_alignment = 0; |
2652 | m->pages = NULL; | 2653 | m->pages = NULL; |
2653 | m->pagelist = NULL; | 2654 | m->pagelist = NULL; |
2655 | #ifdef CONFIG_BLOCK | ||
2654 | m->bio = NULL; | 2656 | m->bio = NULL; |
2655 | m->bio_iter = NULL; | 2657 | m->bio_iter = NULL; |
2656 | m->bio_seg = 0; | 2658 | m->bio_seg = 0; |
2659 | #endif /* CONFIG_BLOCK */ | ||
2657 | m->trail = NULL; | 2660 | m->trail = NULL; |
2658 | 2661 | ||
2659 | /* front */ | 2662 | /* front */ |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index eb9a44478764..39629b66f3b1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | static const struct ceph_connection_operations osd_con_ops; | 24 | static const struct ceph_connection_operations osd_con_ops; |
25 | 25 | ||
26 | static void send_queued(struct ceph_osd_client *osdc); | 26 | static void __send_queued(struct ceph_osd_client *osdc); |
27 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); | 27 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); |
28 | static void __register_request(struct ceph_osd_client *osdc, | 28 | static void __register_request(struct ceph_osd_client *osdc, |
29 | struct ceph_osd_request *req); | 29 | struct ceph_osd_request *req); |
@@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, | |||
32 | static void __send_request(struct ceph_osd_client *osdc, | 32 | static void __send_request(struct ceph_osd_client *osdc, |
33 | struct ceph_osd_request *req); | 33 | struct ceph_osd_request *req); |
34 | 34 | ||
35 | static int op_needs_trail(int op) | ||
36 | { | ||
37 | switch (op) { | ||
38 | case CEPH_OSD_OP_GETXATTR: | ||
39 | case CEPH_OSD_OP_SETXATTR: | ||
40 | case CEPH_OSD_OP_CMPXATTR: | ||
41 | case CEPH_OSD_OP_CALL: | ||
42 | case CEPH_OSD_OP_NOTIFY: | ||
43 | return 1; | ||
44 | default: | ||
45 | return 0; | ||
46 | } | ||
47 | } | ||
48 | |||
49 | static int op_has_extent(int op) | 35 | static int op_has_extent(int op) |
50 | { | 36 | { |
51 | return (op == CEPH_OSD_OP_READ || | 37 | return (op == CEPH_OSD_OP_READ || |
52 | op == CEPH_OSD_OP_WRITE); | 38 | op == CEPH_OSD_OP_WRITE); |
53 | } | 39 | } |
54 | 40 | ||
55 | int ceph_calc_raw_layout(struct ceph_osd_client *osdc, | ||
56 | struct ceph_file_layout *layout, | ||
57 | u64 snapid, | ||
58 | u64 off, u64 *plen, u64 *bno, | ||
59 | struct ceph_osd_request *req, | ||
60 | struct ceph_osd_req_op *op) | ||
61 | { | ||
62 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
63 | u64 orig_len = *plen; | ||
64 | u64 objoff, objlen; /* extent in object */ | ||
65 | int r; | ||
66 | |||
67 | reqhead->snapid = cpu_to_le64(snapid); | ||
68 | |||
69 | /* object extent? */ | ||
70 | r = ceph_calc_file_object_mapping(layout, off, plen, bno, | ||
71 | &objoff, &objlen); | ||
72 | if (r < 0) | ||
73 | return r; | ||
74 | if (*plen < orig_len) | ||
75 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
76 | orig_len - *plen, off, *plen); | ||
77 | |||
78 | if (op_has_extent(op->op)) { | ||
79 | op->extent.offset = objoff; | ||
80 | op->extent.length = objlen; | ||
81 | } | ||
82 | req->r_num_pages = calc_pages_for(off, *plen); | ||
83 | req->r_page_alignment = off & ~PAGE_MASK; | ||
84 | if (op->op == CEPH_OSD_OP_WRITE) | ||
85 | op->payload_len = *plen; | ||
86 | |||
87 | dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", | ||
88 | *bno, objoff, objlen, req->r_num_pages); | ||
89 | return 0; | ||
90 | } | ||
91 | EXPORT_SYMBOL(ceph_calc_raw_layout); | ||
92 | |||
93 | /* | 41 | /* |
94 | * Implement client access to distributed object storage cluster. | 42 | * Implement client access to distributed object storage cluster. |
95 | * | 43 | * |
@@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); | |||
115 | * | 63 | * |
116 | * fill osd op in request message. | 64 | * fill osd op in request message. |
117 | */ | 65 | */ |
118 | static int calc_layout(struct ceph_osd_client *osdc, | 66 | static int calc_layout(struct ceph_vino vino, |
119 | struct ceph_vino vino, | ||
120 | struct ceph_file_layout *layout, | 67 | struct ceph_file_layout *layout, |
121 | u64 off, u64 *plen, | 68 | u64 off, u64 *plen, |
122 | struct ceph_osd_request *req, | 69 | struct ceph_osd_request *req, |
123 | struct ceph_osd_req_op *op) | 70 | struct ceph_osd_req_op *op) |
124 | { | 71 | { |
125 | u64 bno; | 72 | u64 orig_len = *plen; |
73 | u64 bno = 0; | ||
74 | u64 objoff = 0; | ||
75 | u64 objlen = 0; | ||
126 | int r; | 76 | int r; |
127 | 77 | ||
128 | r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, | 78 | /* object extent? */ |
129 | plen, &bno, req, op); | 79 | r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, |
80 | &objoff, &objlen); | ||
130 | if (r < 0) | 81 | if (r < 0) |
131 | return r; | 82 | return r; |
83 | if (objlen < orig_len) { | ||
84 | *plen = objlen; | ||
85 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
86 | orig_len - *plen, off, *plen); | ||
87 | } | ||
88 | |||
89 | if (op_has_extent(op->op)) { | ||
90 | u32 osize = le32_to_cpu(layout->fl_object_size); | ||
91 | op->extent.offset = objoff; | ||
92 | op->extent.length = objlen; | ||
93 | if (op->extent.truncate_size <= off - objoff) { | ||
94 | op->extent.truncate_size = 0; | ||
95 | } else { | ||
96 | op->extent.truncate_size -= off - objoff; | ||
97 | if (op->extent.truncate_size > osize) | ||
98 | op->extent.truncate_size = osize; | ||
99 | } | ||
100 | } | ||
101 | req->r_num_pages = calc_pages_for(off, *plen); | ||
102 | req->r_page_alignment = off & ~PAGE_MASK; | ||
103 | if (op->op == CEPH_OSD_OP_WRITE) | ||
104 | op->payload_len = *plen; | ||
105 | |||
106 | dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", | ||
107 | bno, objoff, objlen, req->r_num_pages); | ||
132 | 108 | ||
133 | snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); | 109 | snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); |
134 | req->r_oid_len = strlen(req->r_oid); | 110 | req->r_oid_len = strlen(req->r_oid); |
@@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref) | |||
148 | if (req->r_request) | 124 | if (req->r_request) |
149 | ceph_msg_put(req->r_request); | 125 | ceph_msg_put(req->r_request); |
150 | if (req->r_con_filling_msg) { | 126 | if (req->r_con_filling_msg) { |
151 | dout("%s revoking pages %p from con %p\n", __func__, | 127 | dout("%s revoking msg %p from con %p\n", __func__, |
152 | req->r_pages, req->r_con_filling_msg); | 128 | req->r_reply, req->r_con_filling_msg); |
153 | ceph_msg_revoke_incoming(req->r_reply); | 129 | ceph_msg_revoke_incoming(req->r_reply); |
154 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); | 130 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); |
131 | req->r_con_filling_msg = NULL; | ||
155 | } | 132 | } |
156 | if (req->r_reply) | 133 | if (req->r_reply) |
157 | ceph_msg_put(req->r_reply); | 134 | ceph_msg_put(req->r_reply); |
158 | if (req->r_own_pages) | 135 | if (req->r_own_pages) |
159 | ceph_release_page_vector(req->r_pages, | 136 | ceph_release_page_vector(req->r_pages, |
160 | req->r_num_pages); | 137 | req->r_num_pages); |
161 | #ifdef CONFIG_BLOCK | ||
162 | if (req->r_bio) | ||
163 | bio_put(req->r_bio); | ||
164 | #endif | ||
165 | ceph_put_snap_context(req->r_snapc); | 138 | ceph_put_snap_context(req->r_snapc); |
166 | if (req->r_trail) { | 139 | ceph_pagelist_release(&req->r_trail); |
167 | ceph_pagelist_release(req->r_trail); | ||
168 | kfree(req->r_trail); | ||
169 | } | ||
170 | if (req->r_mempool) | 140 | if (req->r_mempool) |
171 | mempool_free(req, req->r_osdc->req_mempool); | 141 | mempool_free(req, req->r_osdc->req_mempool); |
172 | else | 142 | else |
@@ -174,34 +144,14 @@ void ceph_osdc_release_request(struct kref *kref) | |||
174 | } | 144 | } |
175 | EXPORT_SYMBOL(ceph_osdc_release_request); | 145 | EXPORT_SYMBOL(ceph_osdc_release_request); |
176 | 146 | ||
177 | static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) | ||
178 | { | ||
179 | int i = 0; | ||
180 | |||
181 | if (needs_trail) | ||
182 | *needs_trail = 0; | ||
183 | while (ops[i].op) { | ||
184 | if (needs_trail && op_needs_trail(ops[i].op)) | ||
185 | *needs_trail = 1; | ||
186 | i++; | ||
187 | } | ||
188 | |||
189 | return i; | ||
190 | } | ||
191 | |||
192 | struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | 147 | struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, |
193 | int flags, | ||
194 | struct ceph_snap_context *snapc, | 148 | struct ceph_snap_context *snapc, |
195 | struct ceph_osd_req_op *ops, | 149 | unsigned int num_op, |
196 | bool use_mempool, | 150 | bool use_mempool, |
197 | gfp_t gfp_flags, | 151 | gfp_t gfp_flags) |
198 | struct page **pages, | ||
199 | struct bio *bio) | ||
200 | { | 152 | { |
201 | struct ceph_osd_request *req; | 153 | struct ceph_osd_request *req; |
202 | struct ceph_msg *msg; | 154 | struct ceph_msg *msg; |
203 | int needs_trail; | ||
204 | int num_op = get_num_ops(ops, &needs_trail); | ||
205 | size_t msg_size = sizeof(struct ceph_osd_request_head); | 155 | size_t msg_size = sizeof(struct ceph_osd_request_head); |
206 | 156 | ||
207 | msg_size += num_op*sizeof(struct ceph_osd_op); | 157 | msg_size += num_op*sizeof(struct ceph_osd_op); |
@@ -228,10 +178,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
228 | INIT_LIST_HEAD(&req->r_req_lru_item); | 178 | INIT_LIST_HEAD(&req->r_req_lru_item); |
229 | INIT_LIST_HEAD(&req->r_osd_item); | 179 | INIT_LIST_HEAD(&req->r_osd_item); |
230 | 180 | ||
231 | req->r_flags = flags; | ||
232 | |||
233 | WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); | ||
234 | |||
235 | /* create reply message */ | 181 | /* create reply message */ |
236 | if (use_mempool) | 182 | if (use_mempool) |
237 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | 183 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); |
@@ -244,15 +190,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
244 | } | 190 | } |
245 | req->r_reply = msg; | 191 | req->r_reply = msg; |
246 | 192 | ||
247 | /* allocate space for the trailing data */ | 193 | ceph_pagelist_init(&req->r_trail); |
248 | if (needs_trail) { | ||
249 | req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); | ||
250 | if (!req->r_trail) { | ||
251 | ceph_osdc_put_request(req); | ||
252 | return NULL; | ||
253 | } | ||
254 | ceph_pagelist_init(req->r_trail); | ||
255 | } | ||
256 | 194 | ||
257 | /* create request message; allow space for oid */ | 195 | /* create request message; allow space for oid */ |
258 | msg_size += MAX_OBJ_NAME_SIZE; | 196 | msg_size += MAX_OBJ_NAME_SIZE; |
@@ -270,13 +208,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
270 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 208 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
271 | 209 | ||
272 | req->r_request = msg; | 210 | req->r_request = msg; |
273 | req->r_pages = pages; | ||
274 | #ifdef CONFIG_BLOCK | ||
275 | if (bio) { | ||
276 | req->r_bio = bio; | ||
277 | bio_get(req->r_bio); | ||
278 | } | ||
279 | #endif | ||
280 | 211 | ||
281 | return req; | 212 | return req; |
282 | } | 213 | } |
@@ -289,6 +220,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req, | |||
289 | dst->op = cpu_to_le16(src->op); | 220 | dst->op = cpu_to_le16(src->op); |
290 | 221 | ||
291 | switch (src->op) { | 222 | switch (src->op) { |
223 | case CEPH_OSD_OP_STAT: | ||
224 | break; | ||
292 | case CEPH_OSD_OP_READ: | 225 | case CEPH_OSD_OP_READ: |
293 | case CEPH_OSD_OP_WRITE: | 226 | case CEPH_OSD_OP_WRITE: |
294 | dst->extent.offset = | 227 | dst->extent.offset = |
@@ -300,52 +233,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req, | |||
300 | dst->extent.truncate_seq = | 233 | dst->extent.truncate_seq = |
301 | cpu_to_le32(src->extent.truncate_seq); | 234 | cpu_to_le32(src->extent.truncate_seq); |
302 | break; | 235 | break; |
303 | |||
304 | case CEPH_OSD_OP_GETXATTR: | ||
305 | case CEPH_OSD_OP_SETXATTR: | ||
306 | case CEPH_OSD_OP_CMPXATTR: | ||
307 | BUG_ON(!req->r_trail); | ||
308 | |||
309 | dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); | ||
310 | dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); | ||
311 | dst->xattr.cmp_op = src->xattr.cmp_op; | ||
312 | dst->xattr.cmp_mode = src->xattr.cmp_mode; | ||
313 | ceph_pagelist_append(req->r_trail, src->xattr.name, | ||
314 | src->xattr.name_len); | ||
315 | ceph_pagelist_append(req->r_trail, src->xattr.val, | ||
316 | src->xattr.value_len); | ||
317 | break; | ||
318 | case CEPH_OSD_OP_CALL: | 236 | case CEPH_OSD_OP_CALL: |
319 | BUG_ON(!req->r_trail); | ||
320 | |||
321 | dst->cls.class_len = src->cls.class_len; | 237 | dst->cls.class_len = src->cls.class_len; |
322 | dst->cls.method_len = src->cls.method_len; | 238 | dst->cls.method_len = src->cls.method_len; |
323 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); | 239 | dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); |
324 | 240 | ||
325 | ceph_pagelist_append(req->r_trail, src->cls.class_name, | 241 | ceph_pagelist_append(&req->r_trail, src->cls.class_name, |
326 | src->cls.class_len); | 242 | src->cls.class_len); |
327 | ceph_pagelist_append(req->r_trail, src->cls.method_name, | 243 | ceph_pagelist_append(&req->r_trail, src->cls.method_name, |
328 | src->cls.method_len); | 244 | src->cls.method_len); |
329 | ceph_pagelist_append(req->r_trail, src->cls.indata, | 245 | ceph_pagelist_append(&req->r_trail, src->cls.indata, |
330 | src->cls.indata_len); | 246 | src->cls.indata_len); |
331 | break; | 247 | break; |
332 | case CEPH_OSD_OP_ROLLBACK: | ||
333 | dst->snap.snapid = cpu_to_le64(src->snap.snapid); | ||
334 | break; | ||
335 | case CEPH_OSD_OP_STARTSYNC: | 248 | case CEPH_OSD_OP_STARTSYNC: |
336 | break; | 249 | break; |
337 | case CEPH_OSD_OP_NOTIFY: | ||
338 | { | ||
339 | __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); | ||
340 | __le32 timeout = cpu_to_le32(src->watch.timeout); | ||
341 | |||
342 | BUG_ON(!req->r_trail); | ||
343 | |||
344 | ceph_pagelist_append(req->r_trail, | ||
345 | &prot_ver, sizeof(prot_ver)); | ||
346 | ceph_pagelist_append(req->r_trail, | ||
347 | &timeout, sizeof(timeout)); | ||
348 | } | ||
349 | case CEPH_OSD_OP_NOTIFY_ACK: | 250 | case CEPH_OSD_OP_NOTIFY_ACK: |
350 | case CEPH_OSD_OP_WATCH: | 251 | case CEPH_OSD_OP_WATCH: |
351 | dst->watch.cookie = cpu_to_le64(src->watch.cookie); | 252 | dst->watch.cookie = cpu_to_le64(src->watch.cookie); |
@@ -356,6 +257,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req, | |||
356 | pr_err("unrecognized osd opcode %d\n", dst->op); | 257 | pr_err("unrecognized osd opcode %d\n", dst->op); |
357 | WARN_ON(1); | 258 | WARN_ON(1); |
358 | break; | 259 | break; |
260 | case CEPH_OSD_OP_MAPEXT: | ||
261 | case CEPH_OSD_OP_MASKTRUNC: | ||
262 | case CEPH_OSD_OP_SPARSE_READ: | ||
263 | case CEPH_OSD_OP_NOTIFY: | ||
264 | case CEPH_OSD_OP_ASSERT_VER: | ||
265 | case CEPH_OSD_OP_WRITEFULL: | ||
266 | case CEPH_OSD_OP_TRUNCATE: | ||
267 | case CEPH_OSD_OP_ZERO: | ||
268 | case CEPH_OSD_OP_DELETE: | ||
269 | case CEPH_OSD_OP_APPEND: | ||
270 | case CEPH_OSD_OP_SETTRUNC: | ||
271 | case CEPH_OSD_OP_TRIMTRUNC: | ||
272 | case CEPH_OSD_OP_TMAPUP: | ||
273 | case CEPH_OSD_OP_TMAPPUT: | ||
274 | case CEPH_OSD_OP_TMAPGET: | ||
275 | case CEPH_OSD_OP_CREATE: | ||
276 | case CEPH_OSD_OP_ROLLBACK: | ||
277 | case CEPH_OSD_OP_OMAPGETKEYS: | ||
278 | case CEPH_OSD_OP_OMAPGETVALS: | ||
279 | case CEPH_OSD_OP_OMAPGETHEADER: | ||
280 | case CEPH_OSD_OP_OMAPGETVALSBYKEYS: | ||
281 | case CEPH_OSD_OP_MODE_RD: | ||
282 | case CEPH_OSD_OP_OMAPSETVALS: | ||
283 | case CEPH_OSD_OP_OMAPSETHEADER: | ||
284 | case CEPH_OSD_OP_OMAPCLEAR: | ||
285 | case CEPH_OSD_OP_OMAPRMKEYS: | ||
286 | case CEPH_OSD_OP_OMAP_CMP: | ||
287 | case CEPH_OSD_OP_CLONERANGE: | ||
288 | case CEPH_OSD_OP_ASSERT_SRC_VERSION: | ||
289 | case CEPH_OSD_OP_SRC_CMPXATTR: | ||
290 | case CEPH_OSD_OP_GETXATTR: | ||
291 | case CEPH_OSD_OP_GETXATTRS: | ||
292 | case CEPH_OSD_OP_CMPXATTR: | ||
293 | case CEPH_OSD_OP_SETXATTR: | ||
294 | case CEPH_OSD_OP_SETXATTRS: | ||
295 | case CEPH_OSD_OP_RESETXATTRS: | ||
296 | case CEPH_OSD_OP_RMXATTR: | ||
297 | case CEPH_OSD_OP_PULL: | ||
298 | case CEPH_OSD_OP_PUSH: | ||
299 | case CEPH_OSD_OP_BALANCEREADS: | ||
300 | case CEPH_OSD_OP_UNBALANCEREADS: | ||
301 | case CEPH_OSD_OP_SCRUB: | ||
302 | case CEPH_OSD_OP_SCRUB_RESERVE: | ||
303 | case CEPH_OSD_OP_SCRUB_UNRESERVE: | ||
304 | case CEPH_OSD_OP_SCRUB_STOP: | ||
305 | case CEPH_OSD_OP_SCRUB_MAP: | ||
306 | case CEPH_OSD_OP_WRLOCK: | ||
307 | case CEPH_OSD_OP_WRUNLOCK: | ||
308 | case CEPH_OSD_OP_RDLOCK: | ||
309 | case CEPH_OSD_OP_RDUNLOCK: | ||
310 | case CEPH_OSD_OP_UPLOCK: | ||
311 | case CEPH_OSD_OP_DNLOCK: | ||
312 | case CEPH_OSD_OP_PGLS: | ||
313 | case CEPH_OSD_OP_PGLS_FILTER: | ||
314 | pr_err("unsupported osd opcode %s\n", | ||
315 | ceph_osd_op_name(dst->op)); | ||
316 | WARN_ON(1); | ||
317 | break; | ||
359 | } | 318 | } |
360 | dst->payload_len = cpu_to_le32(src->payload_len); | 319 | dst->payload_len = cpu_to_le32(src->payload_len); |
361 | } | 320 | } |
@@ -365,25 +324,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req, | |||
365 | * | 324 | * |
366 | */ | 325 | */ |
367 | void ceph_osdc_build_request(struct ceph_osd_request *req, | 326 | void ceph_osdc_build_request(struct ceph_osd_request *req, |
368 | u64 off, u64 *plen, | 327 | u64 off, u64 len, unsigned int num_op, |
369 | struct ceph_osd_req_op *src_ops, | 328 | struct ceph_osd_req_op *src_ops, |
370 | struct ceph_snap_context *snapc, | 329 | struct ceph_snap_context *snapc, u64 snap_id, |
371 | struct timespec *mtime, | 330 | struct timespec *mtime) |
372 | const char *oid, | ||
373 | int oid_len) | ||
374 | { | 331 | { |
375 | struct ceph_msg *msg = req->r_request; | 332 | struct ceph_msg *msg = req->r_request; |
376 | struct ceph_osd_request_head *head; | 333 | struct ceph_osd_request_head *head; |
377 | struct ceph_osd_req_op *src_op; | 334 | struct ceph_osd_req_op *src_op; |
378 | struct ceph_osd_op *op; | 335 | struct ceph_osd_op *op; |
379 | void *p; | 336 | void *p; |
380 | int num_op = get_num_ops(src_ops, NULL); | ||
381 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | 337 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); |
382 | int flags = req->r_flags; | 338 | int flags = req->r_flags; |
383 | u64 data_len = 0; | 339 | u64 data_len; |
384 | int i; | 340 | int i; |
385 | 341 | ||
342 | WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); | ||
343 | |||
386 | head = msg->front.iov_base; | 344 | head = msg->front.iov_base; |
345 | head->snapid = cpu_to_le64(snap_id); | ||
387 | op = (void *)(head + 1); | 346 | op = (void *)(head + 1); |
388 | p = (void *)(op + num_op); | 347 | p = (void *)(op + num_op); |
389 | 348 | ||
@@ -393,23 +352,17 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
393 | head->flags = cpu_to_le32(flags); | 352 | head->flags = cpu_to_le32(flags); |
394 | if (flags & CEPH_OSD_FLAG_WRITE) | 353 | if (flags & CEPH_OSD_FLAG_WRITE) |
395 | ceph_encode_timespec(&head->mtime, mtime); | 354 | ceph_encode_timespec(&head->mtime, mtime); |
355 | BUG_ON(num_op > (unsigned int) ((u16) -1)); | ||
396 | head->num_ops = cpu_to_le16(num_op); | 356 | head->num_ops = cpu_to_le16(num_op); |
397 | 357 | ||
398 | |||
399 | /* fill in oid */ | 358 | /* fill in oid */ |
400 | head->object_len = cpu_to_le32(oid_len); | 359 | head->object_len = cpu_to_le32(req->r_oid_len); |
401 | memcpy(p, oid, oid_len); | 360 | memcpy(p, req->r_oid, req->r_oid_len); |
402 | p += oid_len; | 361 | p += req->r_oid_len; |
403 | 362 | ||
404 | src_op = src_ops; | 363 | src_op = src_ops; |
405 | while (src_op->op) { | 364 | while (num_op--) |
406 | osd_req_encode_op(req, op, src_op); | 365 | osd_req_encode_op(req, op++, src_op++); |
407 | src_op++; | ||
408 | op++; | ||
409 | } | ||
410 | |||
411 | if (req->r_trail) | ||
412 | data_len += req->r_trail->length; | ||
413 | 366 | ||
414 | if (snapc) { | 367 | if (snapc) { |
415 | head->snap_seq = cpu_to_le64(snapc->seq); | 368 | head->snap_seq = cpu_to_le64(snapc->seq); |
@@ -420,14 +373,12 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
420 | } | 373 | } |
421 | } | 374 | } |
422 | 375 | ||
376 | data_len = req->r_trail.length; | ||
423 | if (flags & CEPH_OSD_FLAG_WRITE) { | 377 | if (flags & CEPH_OSD_FLAG_WRITE) { |
424 | req->r_request->hdr.data_off = cpu_to_le16(off); | 378 | req->r_request->hdr.data_off = cpu_to_le16(off); |
425 | req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); | 379 | data_len += len; |
426 | } else if (data_len) { | ||
427 | req->r_request->hdr.data_off = 0; | ||
428 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | ||
429 | } | 380 | } |
430 | 381 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | |
431 | req->r_request->page_alignment = req->r_page_alignment; | 382 | req->r_request->page_alignment = req->r_page_alignment; |
432 | 383 | ||
433 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | 384 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |
@@ -459,34 +410,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
459 | u32 truncate_seq, | 410 | u32 truncate_seq, |
460 | u64 truncate_size, | 411 | u64 truncate_size, |
461 | struct timespec *mtime, | 412 | struct timespec *mtime, |
462 | bool use_mempool, int num_reply, | 413 | bool use_mempool, |
463 | int page_align) | 414 | int page_align) |
464 | { | 415 | { |
465 | struct ceph_osd_req_op ops[3]; | 416 | struct ceph_osd_req_op ops[2]; |
466 | struct ceph_osd_request *req; | 417 | struct ceph_osd_request *req; |
418 | unsigned int num_op = 1; | ||
467 | int r; | 419 | int r; |
468 | 420 | ||
421 | memset(&ops, 0, sizeof ops); | ||
422 | |||
469 | ops[0].op = opcode; | 423 | ops[0].op = opcode; |
470 | ops[0].extent.truncate_seq = truncate_seq; | 424 | ops[0].extent.truncate_seq = truncate_seq; |
471 | ops[0].extent.truncate_size = truncate_size; | 425 | ops[0].extent.truncate_size = truncate_size; |
472 | ops[0].payload_len = 0; | ||
473 | 426 | ||
474 | if (do_sync) { | 427 | if (do_sync) { |
475 | ops[1].op = CEPH_OSD_OP_STARTSYNC; | 428 | ops[1].op = CEPH_OSD_OP_STARTSYNC; |
476 | ops[1].payload_len = 0; | 429 | num_op++; |
477 | ops[2].op = 0; | 430 | } |
478 | } else | 431 | |
479 | ops[1].op = 0; | 432 | req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, |
480 | 433 | GFP_NOFS); | |
481 | req = ceph_osdc_alloc_request(osdc, flags, | ||
482 | snapc, ops, | ||
483 | use_mempool, | ||
484 | GFP_NOFS, NULL, NULL); | ||
485 | if (!req) | 434 | if (!req) |
486 | return ERR_PTR(-ENOMEM); | 435 | return ERR_PTR(-ENOMEM); |
436 | req->r_flags = flags; | ||
487 | 437 | ||
488 | /* calculate max write size */ | 438 | /* calculate max write size */ |
489 | r = calc_layout(osdc, vino, layout, off, plen, req, ops); | 439 | r = calc_layout(vino, layout, off, plen, req, ops); |
490 | if (r < 0) | 440 | if (r < 0) |
491 | return ERR_PTR(r); | 441 | return ERR_PTR(r); |
492 | req->r_file_layout = *layout; /* keep a copy */ | 442 | req->r_file_layout = *layout; /* keep a copy */ |
@@ -496,10 +446,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
496 | req->r_num_pages = calc_pages_for(page_align, *plen); | 446 | req->r_num_pages = calc_pages_for(page_align, *plen); |
497 | req->r_page_alignment = page_align; | 447 | req->r_page_alignment = page_align; |
498 | 448 | ||
499 | ceph_osdc_build_request(req, off, plen, ops, | 449 | ceph_osdc_build_request(req, off, *plen, num_op, ops, |
500 | snapc, | 450 | snapc, vino.snap, mtime); |
501 | mtime, | ||
502 | req->r_oid, req->r_oid_len); | ||
503 | 451 | ||
504 | return req; | 452 | return req; |
505 | } | 453 | } |
@@ -623,8 +571,8 @@ static void osd_reset(struct ceph_connection *con) | |||
623 | down_read(&osdc->map_sem); | 571 | down_read(&osdc->map_sem); |
624 | mutex_lock(&osdc->request_mutex); | 572 | mutex_lock(&osdc->request_mutex); |
625 | __kick_osd_requests(osdc, osd); | 573 | __kick_osd_requests(osdc, osd); |
574 | __send_queued(osdc); | ||
626 | mutex_unlock(&osdc->request_mutex); | 575 | mutex_unlock(&osdc->request_mutex); |
627 | send_queued(osdc); | ||
628 | up_read(&osdc->map_sem); | 576 | up_read(&osdc->map_sem); |
629 | } | 577 | } |
630 | 578 | ||
@@ -739,31 +687,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc) | |||
739 | */ | 687 | */ |
740 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | 688 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) |
741 | { | 689 | { |
742 | struct ceph_osd_request *req; | 690 | struct ceph_entity_addr *peer_addr; |
743 | int ret = 0; | ||
744 | 691 | ||
745 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); | 692 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); |
746 | if (list_empty(&osd->o_requests) && | 693 | if (list_empty(&osd->o_requests) && |
747 | list_empty(&osd->o_linger_requests)) { | 694 | list_empty(&osd->o_linger_requests)) { |
748 | __remove_osd(osdc, osd); | 695 | __remove_osd(osdc, osd); |
749 | ret = -ENODEV; | 696 | |
750 | } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], | 697 | return -ENODEV; |
751 | &osd->o_con.peer_addr, | 698 | } |
752 | sizeof(osd->o_con.peer_addr)) == 0 && | 699 | |
753 | !ceph_con_opened(&osd->o_con)) { | 700 | peer_addr = &osdc->osdmap->osd_addr[osd->o_osd]; |
701 | if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) && | ||
702 | !ceph_con_opened(&osd->o_con)) { | ||
703 | struct ceph_osd_request *req; | ||
704 | |||
754 | dout(" osd addr hasn't changed and connection never opened," | 705 | dout(" osd addr hasn't changed and connection never opened," |
755 | " letting msgr retry"); | 706 | " letting msgr retry"); |
756 | /* touch each r_stamp for handle_timeout()'s benfit */ | 707 | /* touch each r_stamp for handle_timeout()'s benfit */ |
757 | list_for_each_entry(req, &osd->o_requests, r_osd_item) | 708 | list_for_each_entry(req, &osd->o_requests, r_osd_item) |
758 | req->r_stamp = jiffies; | 709 | req->r_stamp = jiffies; |
759 | ret = -EAGAIN; | 710 | |
760 | } else { | 711 | return -EAGAIN; |
761 | ceph_con_close(&osd->o_con); | ||
762 | ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, | ||
763 | &osdc->osdmap->osd_addr[osd->o_osd]); | ||
764 | osd->o_incarnation++; | ||
765 | } | 712 | } |
766 | return ret; | 713 | |
714 | ceph_con_close(&osd->o_con); | ||
715 | ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr); | ||
716 | osd->o_incarnation++; | ||
717 | |||
718 | return 0; | ||
767 | } | 719 | } |
768 | 720 | ||
769 | static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) | 721 | static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) |
@@ -1062,16 +1014,13 @@ static void __send_request(struct ceph_osd_client *osdc, | |||
1062 | /* | 1014 | /* |
1063 | * Send any requests in the queue (req_unsent). | 1015 | * Send any requests in the queue (req_unsent). |
1064 | */ | 1016 | */ |
1065 | static void send_queued(struct ceph_osd_client *osdc) | 1017 | static void __send_queued(struct ceph_osd_client *osdc) |
1066 | { | 1018 | { |
1067 | struct ceph_osd_request *req, *tmp; | 1019 | struct ceph_osd_request *req, *tmp; |
1068 | 1020 | ||
1069 | dout("send_queued\n"); | 1021 | dout("__send_queued\n"); |
1070 | mutex_lock(&osdc->request_mutex); | 1022 | list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) |
1071 | list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) { | ||
1072 | __send_request(osdc, req); | 1023 | __send_request(osdc, req); |
1073 | } | ||
1074 | mutex_unlock(&osdc->request_mutex); | ||
1075 | } | 1024 | } |
1076 | 1025 | ||
1077 | /* | 1026 | /* |
@@ -1123,8 +1072,8 @@ static void handle_timeout(struct work_struct *work) | |||
1123 | } | 1072 | } |
1124 | 1073 | ||
1125 | __schedule_osd_timeout(osdc); | 1074 | __schedule_osd_timeout(osdc); |
1075 | __send_queued(osdc); | ||
1126 | mutex_unlock(&osdc->request_mutex); | 1076 | mutex_unlock(&osdc->request_mutex); |
1127 | send_queued(osdc); | ||
1128 | up_read(&osdc->map_sem); | 1077 | up_read(&osdc->map_sem); |
1129 | } | 1078 | } |
1130 | 1079 | ||
@@ -1462,7 +1411,9 @@ done: | |||
1462 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) | 1411 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) |
1463 | ceph_monc_request_next_osdmap(&osdc->client->monc); | 1412 | ceph_monc_request_next_osdmap(&osdc->client->monc); |
1464 | 1413 | ||
1465 | send_queued(osdc); | 1414 | mutex_lock(&osdc->request_mutex); |
1415 | __send_queued(osdc); | ||
1416 | mutex_unlock(&osdc->request_mutex); | ||
1466 | up_read(&osdc->map_sem); | 1417 | up_read(&osdc->map_sem); |
1467 | wake_up_all(&osdc->client->auth_wq); | 1418 | wake_up_all(&osdc->client->auth_wq); |
1468 | return; | 1419 | return; |
@@ -1556,8 +1507,7 @@ static void __remove_event(struct ceph_osd_event *event) | |||
1556 | 1507 | ||
1557 | int ceph_osdc_create_event(struct ceph_osd_client *osdc, | 1508 | int ceph_osdc_create_event(struct ceph_osd_client *osdc, |
1558 | void (*event_cb)(u64, u64, u8, void *), | 1509 | void (*event_cb)(u64, u64, u8, void *), |
1559 | int one_shot, void *data, | 1510 | void *data, struct ceph_osd_event **pevent) |
1560 | struct ceph_osd_event **pevent) | ||
1561 | { | 1511 | { |
1562 | struct ceph_osd_event *event; | 1512 | struct ceph_osd_event *event; |
1563 | 1513 | ||
@@ -1567,14 +1517,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, | |||
1567 | 1517 | ||
1568 | dout("create_event %p\n", event); | 1518 | dout("create_event %p\n", event); |
1569 | event->cb = event_cb; | 1519 | event->cb = event_cb; |
1570 | event->one_shot = one_shot; | 1520 | event->one_shot = 0; |
1571 | event->data = data; | 1521 | event->data = data; |
1572 | event->osdc = osdc; | 1522 | event->osdc = osdc; |
1573 | INIT_LIST_HEAD(&event->osd_node); | 1523 | INIT_LIST_HEAD(&event->osd_node); |
1574 | RB_CLEAR_NODE(&event->node); | 1524 | RB_CLEAR_NODE(&event->node); |
1575 | kref_init(&event->kref); /* one ref for us */ | 1525 | kref_init(&event->kref); /* one ref for us */ |
1576 | kref_get(&event->kref); /* one ref for the caller */ | 1526 | kref_get(&event->kref); /* one ref for the caller */ |
1577 | init_completion(&event->completion); | ||
1578 | 1527 | ||
1579 | spin_lock(&osdc->event_lock); | 1528 | spin_lock(&osdc->event_lock); |
1580 | event->cookie = ++osdc->event_count; | 1529 | event->cookie = ++osdc->event_count; |
@@ -1610,7 +1559,6 @@ static void do_event_work(struct work_struct *work) | |||
1610 | 1559 | ||
1611 | dout("do_event_work completing %p\n", event); | 1560 | dout("do_event_work completing %p\n", event); |
1612 | event->cb(ver, notify_id, opcode, event->data); | 1561 | event->cb(ver, notify_id, opcode, event->data); |
1613 | complete(&event->completion); | ||
1614 | dout("do_event_work completed %p\n", event); | 1562 | dout("do_event_work completed %p\n", event); |
1615 | ceph_osdc_put_event(event); | 1563 | ceph_osdc_put_event(event); |
1616 | kfree(event_work); | 1564 | kfree(event_work); |
@@ -1620,7 +1568,8 @@ static void do_event_work(struct work_struct *work) | |||
1620 | /* | 1568 | /* |
1621 | * Process osd watch notifications | 1569 | * Process osd watch notifications |
1622 | */ | 1570 | */ |
1623 | void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) | 1571 | static void handle_watch_notify(struct ceph_osd_client *osdc, |
1572 | struct ceph_msg *msg) | ||
1624 | { | 1573 | { |
1625 | void *p, *end; | 1574 | void *p, *end; |
1626 | u8 proto_ver; | 1575 | u8 proto_ver; |
@@ -1641,9 +1590,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) | |||
1641 | spin_lock(&osdc->event_lock); | 1590 | spin_lock(&osdc->event_lock); |
1642 | event = __find_event(osdc, cookie); | 1591 | event = __find_event(osdc, cookie); |
1643 | if (event) { | 1592 | if (event) { |
1593 | BUG_ON(event->one_shot); | ||
1644 | get_event(event); | 1594 | get_event(event); |
1645 | if (event->one_shot) | ||
1646 | __remove_event(event); | ||
1647 | } | 1595 | } |
1648 | spin_unlock(&osdc->event_lock); | 1596 | spin_unlock(&osdc->event_lock); |
1649 | dout("handle_watch_notify cookie %lld ver %lld event %p\n", | 1597 | dout("handle_watch_notify cookie %lld ver %lld event %p\n", |
@@ -1668,7 +1616,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) | |||
1668 | return; | 1616 | return; |
1669 | 1617 | ||
1670 | done_err: | 1618 | done_err: |
1671 | complete(&event->completion); | ||
1672 | ceph_osdc_put_event(event); | 1619 | ceph_osdc_put_event(event); |
1673 | return; | 1620 | return; |
1674 | 1621 | ||
@@ -1677,21 +1624,6 @@ bad: | |||
1677 | return; | 1624 | return; |
1678 | } | 1625 | } |
1679 | 1626 | ||
1680 | int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) | ||
1681 | { | ||
1682 | int err; | ||
1683 | |||
1684 | dout("wait_event %p\n", event); | ||
1685 | err = wait_for_completion_interruptible_timeout(&event->completion, | ||
1686 | timeout * HZ); | ||
1687 | ceph_osdc_put_event(event); | ||
1688 | if (err > 0) | ||
1689 | err = 0; | ||
1690 | dout("wait_event %p returns %d\n", event, err); | ||
1691 | return err; | ||
1692 | } | ||
1693 | EXPORT_SYMBOL(ceph_osdc_wait_event); | ||
1694 | |||
1695 | /* | 1627 | /* |
1696 | * Register request, send initial attempt. | 1628 | * Register request, send initial attempt. |
1697 | */ | 1629 | */ |
@@ -1706,7 +1638,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, | |||
1706 | #ifdef CONFIG_BLOCK | 1638 | #ifdef CONFIG_BLOCK |
1707 | req->r_request->bio = req->r_bio; | 1639 | req->r_request->bio = req->r_bio; |
1708 | #endif | 1640 | #endif |
1709 | req->r_request->trail = req->r_trail; | 1641 | req->r_request->trail = &req->r_trail; |
1710 | 1642 | ||
1711 | register_request(osdc, req); | 1643 | register_request(osdc, req); |
1712 | 1644 | ||
@@ -1865,7 +1797,6 @@ out_mempool: | |||
1865 | out: | 1797 | out: |
1866 | return err; | 1798 | return err; |
1867 | } | 1799 | } |
1868 | EXPORT_SYMBOL(ceph_osdc_init); | ||
1869 | 1800 | ||
1870 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | 1801 | void ceph_osdc_stop(struct ceph_osd_client *osdc) |
1871 | { | 1802 | { |
@@ -1882,7 +1813,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) | |||
1882 | ceph_msgpool_destroy(&osdc->msgpool_op); | 1813 | ceph_msgpool_destroy(&osdc->msgpool_op); |
1883 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); | 1814 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); |
1884 | } | 1815 | } |
1885 | EXPORT_SYMBOL(ceph_osdc_stop); | ||
1886 | 1816 | ||
1887 | /* | 1817 | /* |
1888 | * Read some contiguous pages. If we cross a stripe boundary, shorten | 1818 | * Read some contiguous pages. If we cross a stripe boundary, shorten |
@@ -1902,7 +1832,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1902 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, | 1832 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, |
1903 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 1833 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
1904 | NULL, 0, truncate_seq, truncate_size, NULL, | 1834 | NULL, 0, truncate_seq, truncate_size, NULL, |
1905 | false, 1, page_align); | 1835 | false, page_align); |
1906 | if (IS_ERR(req)) | 1836 | if (IS_ERR(req)) |
1907 | return PTR_ERR(req); | 1837 | return PTR_ERR(req); |
1908 | 1838 | ||
@@ -1931,8 +1861,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1931 | u64 off, u64 len, | 1861 | u64 off, u64 len, |
1932 | u32 truncate_seq, u64 truncate_size, | 1862 | u32 truncate_seq, u64 truncate_size, |
1933 | struct timespec *mtime, | 1863 | struct timespec *mtime, |
1934 | struct page **pages, int num_pages, | 1864 | struct page **pages, int num_pages) |
1935 | int flags, int do_sync, bool nofail) | ||
1936 | { | 1865 | { |
1937 | struct ceph_osd_request *req; | 1866 | struct ceph_osd_request *req; |
1938 | int rc = 0; | 1867 | int rc = 0; |
@@ -1941,11 +1870,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1941 | BUG_ON(vino.snap != CEPH_NOSNAP); | 1870 | BUG_ON(vino.snap != CEPH_NOSNAP); |
1942 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, | 1871 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, |
1943 | CEPH_OSD_OP_WRITE, | 1872 | CEPH_OSD_OP_WRITE, |
1944 | flags | CEPH_OSD_FLAG_ONDISK | | 1873 | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, |
1945 | CEPH_OSD_FLAG_WRITE, | 1874 | snapc, 0, |
1946 | snapc, do_sync, | ||
1947 | truncate_seq, truncate_size, mtime, | 1875 | truncate_seq, truncate_size, mtime, |
1948 | nofail, 1, page_align); | 1876 | true, page_align); |
1949 | if (IS_ERR(req)) | 1877 | if (IS_ERR(req)) |
1950 | return PTR_ERR(req); | 1878 | return PTR_ERR(req); |
1951 | 1879 | ||
@@ -1954,7 +1882,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1954 | dout("writepages %llu~%llu (%d pages)\n", off, len, | 1882 | dout("writepages %llu~%llu (%d pages)\n", off, len, |
1955 | req->r_num_pages); | 1883 | req->r_num_pages); |
1956 | 1884 | ||
1957 | rc = ceph_osdc_start_request(osdc, req, nofail); | 1885 | rc = ceph_osdc_start_request(osdc, req, true); |
1958 | if (!rc) | 1886 | if (!rc) |
1959 | rc = ceph_osdc_wait_request(osdc, req); | 1887 | rc = ceph_osdc_wait_request(osdc, req); |
1960 | 1888 | ||
@@ -2047,7 +1975,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
2047 | if (data_len > 0) { | 1975 | if (data_len > 0) { |
2048 | int want = calc_pages_for(req->r_page_alignment, data_len); | 1976 | int want = calc_pages_for(req->r_page_alignment, data_len); |
2049 | 1977 | ||
2050 | if (unlikely(req->r_num_pages < want)) { | 1978 | if (req->r_pages && unlikely(req->r_num_pages < want)) { |
2051 | pr_warning("tid %lld reply has %d bytes %d pages, we" | 1979 | pr_warning("tid %lld reply has %d bytes %d pages, we" |
2052 | " had only %d pages ready\n", tid, data_len, | 1980 | " had only %d pages ready\n", tid, data_len, |
2053 | want, req->r_num_pages); | 1981 | want, req->r_num_pages); |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index de73214b5d26..3c61e21611d3 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
@@ -13,26 +13,18 @@ | |||
13 | 13 | ||
14 | char *ceph_osdmap_state_str(char *str, int len, int state) | 14 | char *ceph_osdmap_state_str(char *str, int len, int state) |
15 | { | 15 | { |
16 | int flag = 0; | ||
17 | |||
18 | if (!len) | 16 | if (!len) |
19 | goto done; | 17 | return str; |
20 | 18 | ||
21 | *str = '\0'; | 19 | if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP)) |
22 | if (state) { | 20 | snprintf(str, len, "exists, up"); |
23 | if (state & CEPH_OSD_EXISTS) { | 21 | else if (state & CEPH_OSD_EXISTS) |
24 | snprintf(str, len, "exists"); | 22 | snprintf(str, len, "exists"); |
25 | flag = 1; | 23 | else if (state & CEPH_OSD_UP) |
26 | } | 24 | snprintf(str, len, "up"); |
27 | if (state & CEPH_OSD_UP) { | 25 | else |
28 | snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""), | ||
29 | "up"); | ||
30 | flag = 1; | ||
31 | } | ||
32 | } else { | ||
33 | snprintf(str, len, "doesn't exist"); | 26 | snprintf(str, len, "doesn't exist"); |
34 | } | 27 | |
35 | done: | ||
36 | return str; | 28 | return str; |
37 | } | 29 | } |
38 | 30 | ||
@@ -170,6 +162,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
170 | c->choose_local_tries = 2; | 162 | c->choose_local_tries = 2; |
171 | c->choose_local_fallback_tries = 5; | 163 | c->choose_local_fallback_tries = 5; |
172 | c->choose_total_tries = 19; | 164 | c->choose_total_tries = 19; |
165 | c->chooseleaf_descend_once = 0; | ||
173 | 166 | ||
174 | ceph_decode_need(p, end, 4*sizeof(u32), bad); | 167 | ceph_decode_need(p, end, 4*sizeof(u32), bad); |
175 | magic = ceph_decode_32(p); | 168 | magic = ceph_decode_32(p); |
@@ -336,6 +329,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
336 | dout("crush decode tunable choose_total_tries = %d", | 329 | dout("crush decode tunable choose_total_tries = %d", |
337 | c->choose_total_tries); | 330 | c->choose_total_tries); |
338 | 331 | ||
332 | ceph_decode_need(p, end, sizeof(u32), done); | ||
333 | c->chooseleaf_descend_once = ceph_decode_32(p); | ||
334 | dout("crush decode tunable chooseleaf_descend_once = %d", | ||
335 | c->chooseleaf_descend_once); | ||
336 | |||
339 | done: | 337 | done: |
340 | dout("crush_decode success\n"); | 338 | dout("crush_decode success\n"); |
341 | return c; | 339 | return c; |
@@ -1010,7 +1008,7 @@ bad: | |||
1010 | * pass a stride back to the caller. | 1008 | * pass a stride back to the caller. |
1011 | */ | 1009 | */ |
1012 | int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | 1010 | int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, |
1013 | u64 off, u64 *plen, | 1011 | u64 off, u64 len, |
1014 | u64 *ono, | 1012 | u64 *ono, |
1015 | u64 *oxoff, u64 *oxlen) | 1013 | u64 *oxoff, u64 *oxlen) |
1016 | { | 1014 | { |
@@ -1021,7 +1019,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||
1021 | u32 su_per_object; | 1019 | u32 su_per_object; |
1022 | u64 t, su_offset; | 1020 | u64 t, su_offset; |
1023 | 1021 | ||
1024 | dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, | 1022 | dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, |
1025 | osize, su); | 1023 | osize, su); |
1026 | if (su == 0 || sc == 0) | 1024 | if (su == 0 || sc == 0) |
1027 | goto invalid; | 1025 | goto invalid; |
@@ -1054,11 +1052,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | |||
1054 | 1052 | ||
1055 | /* | 1053 | /* |
1056 | * Calculate the length of the extent being written to the selected | 1054 | * Calculate the length of the extent being written to the selected |
1057 | * object. This is the minimum of the full length requested (plen) or | 1055 | * object. This is the minimum of the full length requested (len) or |
1058 | * the remainder of the current stripe being written to. | 1056 | * the remainder of the current stripe being written to. |
1059 | */ | 1057 | */ |
1060 | *oxlen = min_t(u64, *plen, su - su_offset); | 1058 | *oxlen = min_t(u64, len, su - su_offset); |
1061 | *plen = *oxlen; | ||
1062 | 1059 | ||
1063 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | 1060 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); |
1064 | return 0; | 1061 | return 0; |
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index cd9c21df87d1..815a2249cfa9 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c | |||
@@ -12,7 +12,7 @@ | |||
12 | /* | 12 | /* |
13 | * build a vector of user pages | 13 | * build a vector of user pages |
14 | */ | 14 | */ |
15 | struct page **ceph_get_direct_page_vector(const char __user *data, | 15 | struct page **ceph_get_direct_page_vector(const void __user *data, |
16 | int num_pages, bool write_page) | 16 | int num_pages, bool write_page) |
17 | { | 17 | { |
18 | struct page **pages; | 18 | struct page **pages; |
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector); | |||
93 | * copy user data into a page vector | 93 | * copy user data into a page vector |
94 | */ | 94 | */ |
95 | int ceph_copy_user_to_page_vector(struct page **pages, | 95 | int ceph_copy_user_to_page_vector(struct page **pages, |
96 | const char __user *data, | 96 | const void __user *data, |
97 | loff_t off, size_t len) | 97 | loff_t off, size_t len) |
98 | { | 98 | { |
99 | int i = 0; | 99 | int i = 0; |
@@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages, | |||
118 | } | 118 | } |
119 | EXPORT_SYMBOL(ceph_copy_user_to_page_vector); | 119 | EXPORT_SYMBOL(ceph_copy_user_to_page_vector); |
120 | 120 | ||
121 | int ceph_copy_to_page_vector(struct page **pages, | 121 | void ceph_copy_to_page_vector(struct page **pages, |
122 | const char *data, | 122 | const void *data, |
123 | loff_t off, size_t len) | 123 | loff_t off, size_t len) |
124 | { | 124 | { |
125 | int i = 0; | 125 | int i = 0; |
126 | size_t po = off & ~PAGE_CACHE_MASK; | 126 | size_t po = off & ~PAGE_CACHE_MASK; |
127 | size_t left = len; | 127 | size_t left = len; |
128 | size_t l; | ||
129 | 128 | ||
130 | while (left > 0) { | 129 | while (left > 0) { |
131 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | 130 | size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); |
131 | |||
132 | memcpy(page_address(pages[i]) + po, data, l); | 132 | memcpy(page_address(pages[i]) + po, data, l); |
133 | data += l; | 133 | data += l; |
134 | left -= l; | 134 | left -= l; |
@@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages, | |||
138 | i++; | 138 | i++; |
139 | } | 139 | } |
140 | } | 140 | } |
141 | return len; | ||
142 | } | 141 | } |
143 | EXPORT_SYMBOL(ceph_copy_to_page_vector); | 142 | EXPORT_SYMBOL(ceph_copy_to_page_vector); |
144 | 143 | ||
145 | int ceph_copy_from_page_vector(struct page **pages, | 144 | void ceph_copy_from_page_vector(struct page **pages, |
146 | char *data, | 145 | void *data, |
147 | loff_t off, size_t len) | 146 | loff_t off, size_t len) |
148 | { | 147 | { |
149 | int i = 0; | 148 | int i = 0; |
150 | size_t po = off & ~PAGE_CACHE_MASK; | 149 | size_t po = off & ~PAGE_CACHE_MASK; |
151 | size_t left = len; | 150 | size_t left = len; |
152 | size_t l; | ||
153 | 151 | ||
154 | while (left > 0) { | 152 | while (left > 0) { |
155 | l = min_t(size_t, PAGE_CACHE_SIZE-po, left); | 153 | size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); |
154 | |||
156 | memcpy(data, page_address(pages[i]) + po, l); | 155 | memcpy(data, page_address(pages[i]) + po, l); |
157 | data += l; | 156 | data += l; |
158 | left -= l; | 157 | left -= l; |
@@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages, | |||
162 | i++; | 161 | i++; |
163 | } | 162 | } |
164 | } | 163 | } |
165 | return len; | ||
166 | } | 164 | } |
167 | EXPORT_SYMBOL(ceph_copy_from_page_vector); | 165 | EXPORT_SYMBOL(ceph_copy_from_page_vector); |
168 | 166 | ||
@@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); | |||
170 | * copy user data from a page vector into a user pointer | 168 | * copy user data from a page vector into a user pointer |
171 | */ | 169 | */ |
172 | int ceph_copy_page_vector_to_user(struct page **pages, | 170 | int ceph_copy_page_vector_to_user(struct page **pages, |
173 | char __user *data, | 171 | void __user *data, |
174 | loff_t off, size_t len) | 172 | loff_t off, size_t len) |
175 | { | 173 | { |
176 | int i = 0; | 174 | int i = 0; |