diff options
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r-- | drivers/block/rbd.c | 816 |
1 files changed, 462 insertions, 354 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8f428a8ab003..9917943a3572 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -55,8 +55,6 @@ | |||
55 | 55 | ||
56 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ | 56 | #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ |
57 | 57 | ||
58 | #define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX)) | ||
59 | #define RBD_MAX_POOL_NAME_LEN 64 | ||
60 | #define RBD_MAX_SNAP_NAME_LEN 32 | 58 | #define RBD_MAX_SNAP_NAME_LEN 32 |
61 | #define RBD_MAX_OPT_LEN 1024 | 59 | #define RBD_MAX_OPT_LEN 1024 |
62 | 60 | ||
@@ -78,13 +76,12 @@ | |||
78 | */ | 76 | */ |
79 | struct rbd_image_header { | 77 | struct rbd_image_header { |
80 | u64 image_size; | 78 | u64 image_size; |
81 | char block_name[32]; | 79 | char *object_prefix; |
82 | __u8 obj_order; | 80 | __u8 obj_order; |
83 | __u8 crypt_type; | 81 | __u8 crypt_type; |
84 | __u8 comp_type; | 82 | __u8 comp_type; |
85 | struct ceph_snap_context *snapc; | 83 | struct ceph_snap_context *snapc; |
86 | size_t snap_names_len; | 84 | size_t snap_names_len; |
87 | u64 snap_seq; | ||
88 | u32 total_snaps; | 85 | u32 total_snaps; |
89 | 86 | ||
90 | char *snap_names; | 87 | char *snap_names; |
@@ -150,7 +147,7 @@ struct rbd_snap { | |||
150 | * a single device | 147 | * a single device |
151 | */ | 148 | */ |
152 | struct rbd_device { | 149 | struct rbd_device { |
153 | int id; /* blkdev unique id */ | 150 | int dev_id; /* blkdev unique id */ |
154 | 151 | ||
155 | int major; /* blkdev assigned major */ | 152 | int major; /* blkdev assigned major */ |
156 | struct gendisk *disk; /* blkdev's gendisk and rq */ | 153 | struct gendisk *disk; /* blkdev's gendisk and rq */ |
@@ -163,20 +160,24 @@ struct rbd_device { | |||
163 | spinlock_t lock; /* queue lock */ | 160 | spinlock_t lock; /* queue lock */ |
164 | 161 | ||
165 | struct rbd_image_header header; | 162 | struct rbd_image_header header; |
166 | char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */ | 163 | char *image_name; |
167 | int obj_len; | 164 | size_t image_name_len; |
168 | char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */ | 165 | char *header_name; |
169 | char pool_name[RBD_MAX_POOL_NAME_LEN]; | 166 | char *pool_name; |
170 | int poolid; | 167 | int pool_id; |
171 | 168 | ||
172 | struct ceph_osd_event *watch_event; | 169 | struct ceph_osd_event *watch_event; |
173 | struct ceph_osd_request *watch_request; | 170 | struct ceph_osd_request *watch_request; |
174 | 171 | ||
175 | /* protects updating the header */ | 172 | /* protects updating the header */ |
176 | struct rw_semaphore header_rwsem; | 173 | struct rw_semaphore header_rwsem; |
177 | char snap_name[RBD_MAX_SNAP_NAME_LEN]; | 174 | /* name of the snapshot this device reads from */ |
175 | char *snap_name; | ||
176 | /* id of the snapshot this device reads from */ | ||
178 | u64 snap_id; /* current snapshot id */ | 177 | u64 snap_id; /* current snapshot id */ |
179 | int read_only; | 178 | /* whether the snap_id this device reads from still exists */ |
179 | bool snap_exists; | ||
180 | int read_only; | ||
180 | 181 | ||
181 | struct list_head node; | 182 | struct list_head node; |
182 | 183 | ||
@@ -201,8 +202,7 @@ static ssize_t rbd_snap_add(struct device *dev, | |||
201 | struct device_attribute *attr, | 202 | struct device_attribute *attr, |
202 | const char *buf, | 203 | const char *buf, |
203 | size_t count); | 204 | size_t count); |
204 | static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, | 205 | static void __rbd_remove_snap_dev(struct rbd_snap *snap); |
205 | struct rbd_snap *snap); | ||
206 | 206 | ||
207 | static ssize_t rbd_add(struct bus_type *bus, const char *buf, | 207 | static ssize_t rbd_add(struct bus_type *bus, const char *buf, |
208 | size_t count); | 208 | size_t count); |
@@ -240,7 +240,7 @@ static void rbd_put_dev(struct rbd_device *rbd_dev) | |||
240 | put_device(&rbd_dev->dev); | 240 | put_device(&rbd_dev->dev); |
241 | } | 241 | } |
242 | 242 | ||
243 | static int __rbd_refresh_header(struct rbd_device *rbd_dev); | 243 | static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver); |
244 | 244 | ||
245 | static int rbd_open(struct block_device *bdev, fmode_t mode) | 245 | static int rbd_open(struct block_device *bdev, fmode_t mode) |
246 | { | 246 | { |
@@ -273,9 +273,9 @@ static const struct block_device_operations rbd_bd_ops = { | |||
273 | 273 | ||
274 | /* | 274 | /* |
275 | * Initialize an rbd client instance. | 275 | * Initialize an rbd client instance. |
276 | * We own *opt. | 276 | * We own *ceph_opts. |
277 | */ | 277 | */ |
278 | static struct rbd_client *rbd_client_create(struct ceph_options *opt, | 278 | static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts, |
279 | struct rbd_options *rbd_opts) | 279 | struct rbd_options *rbd_opts) |
280 | { | 280 | { |
281 | struct rbd_client *rbdc; | 281 | struct rbd_client *rbdc; |
@@ -291,10 +291,10 @@ static struct rbd_client *rbd_client_create(struct ceph_options *opt, | |||
291 | 291 | ||
292 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 292 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
293 | 293 | ||
294 | rbdc->client = ceph_create_client(opt, rbdc, 0, 0); | 294 | rbdc->client = ceph_create_client(ceph_opts, rbdc, 0, 0); |
295 | if (IS_ERR(rbdc->client)) | 295 | if (IS_ERR(rbdc->client)) |
296 | goto out_mutex; | 296 | goto out_mutex; |
297 | opt = NULL; /* Now rbdc->client is responsible for opt */ | 297 | ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ |
298 | 298 | ||
299 | ret = ceph_open_session(rbdc->client); | 299 | ret = ceph_open_session(rbdc->client); |
300 | if (ret < 0) | 300 | if (ret < 0) |
@@ -317,23 +317,23 @@ out_mutex: | |||
317 | mutex_unlock(&ctl_mutex); | 317 | mutex_unlock(&ctl_mutex); |
318 | kfree(rbdc); | 318 | kfree(rbdc); |
319 | out_opt: | 319 | out_opt: |
320 | if (opt) | 320 | if (ceph_opts) |
321 | ceph_destroy_options(opt); | 321 | ceph_destroy_options(ceph_opts); |
322 | return ERR_PTR(ret); | 322 | return ERR_PTR(ret); |
323 | } | 323 | } |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * Find a ceph client with specific addr and configuration. | 326 | * Find a ceph client with specific addr and configuration. |
327 | */ | 327 | */ |
328 | static struct rbd_client *__rbd_client_find(struct ceph_options *opt) | 328 | static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts) |
329 | { | 329 | { |
330 | struct rbd_client *client_node; | 330 | struct rbd_client *client_node; |
331 | 331 | ||
332 | if (opt->flags & CEPH_OPT_NOSHARE) | 332 | if (ceph_opts->flags & CEPH_OPT_NOSHARE) |
333 | return NULL; | 333 | return NULL; |
334 | 334 | ||
335 | list_for_each_entry(client_node, &rbd_client_list, node) | 335 | list_for_each_entry(client_node, &rbd_client_list, node) |
336 | if (ceph_compare_options(opt, client_node->client) == 0) | 336 | if (!ceph_compare_options(ceph_opts, client_node->client)) |
337 | return client_node; | 337 | return client_node; |
338 | return NULL; | 338 | return NULL; |
339 | } | 339 | } |
@@ -349,7 +349,7 @@ enum { | |||
349 | /* string args above */ | 349 | /* string args above */ |
350 | }; | 350 | }; |
351 | 351 | ||
352 | static match_table_t rbdopt_tokens = { | 352 | static match_table_t rbd_opts_tokens = { |
353 | {Opt_notify_timeout, "notify_timeout=%d"}, | 353 | {Opt_notify_timeout, "notify_timeout=%d"}, |
354 | /* int args above */ | 354 | /* int args above */ |
355 | /* string args above */ | 355 | /* string args above */ |
@@ -358,11 +358,11 @@ static match_table_t rbdopt_tokens = { | |||
358 | 358 | ||
359 | static int parse_rbd_opts_token(char *c, void *private) | 359 | static int parse_rbd_opts_token(char *c, void *private) |
360 | { | 360 | { |
361 | struct rbd_options *rbdopt = private; | 361 | struct rbd_options *rbd_opts = private; |
362 | substring_t argstr[MAX_OPT_ARGS]; | 362 | substring_t argstr[MAX_OPT_ARGS]; |
363 | int token, intval, ret; | 363 | int token, intval, ret; |
364 | 364 | ||
365 | token = match_token(c, rbdopt_tokens, argstr); | 365 | token = match_token(c, rbd_opts_tokens, argstr); |
366 | if (token < 0) | 366 | if (token < 0) |
367 | return -EINVAL; | 367 | return -EINVAL; |
368 | 368 | ||
@@ -383,7 +383,7 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
383 | 383 | ||
384 | switch (token) { | 384 | switch (token) { |
385 | case Opt_notify_timeout: | 385 | case Opt_notify_timeout: |
386 | rbdopt->notify_timeout = intval; | 386 | rbd_opts->notify_timeout = intval; |
387 | break; | 387 | break; |
388 | default: | 388 | default: |
389 | BUG_ON(token); | 389 | BUG_ON(token); |
@@ -400,7 +400,7 @@ static struct rbd_client *rbd_get_client(const char *mon_addr, | |||
400 | char *options) | 400 | char *options) |
401 | { | 401 | { |
402 | struct rbd_client *rbdc; | 402 | struct rbd_client *rbdc; |
403 | struct ceph_options *opt; | 403 | struct ceph_options *ceph_opts; |
404 | struct rbd_options *rbd_opts; | 404 | struct rbd_options *rbd_opts; |
405 | 405 | ||
406 | rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); | 406 | rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL); |
@@ -409,29 +409,29 @@ static struct rbd_client *rbd_get_client(const char *mon_addr, | |||
409 | 409 | ||
410 | rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; | 410 | rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; |
411 | 411 | ||
412 | opt = ceph_parse_options(options, mon_addr, | 412 | ceph_opts = ceph_parse_options(options, mon_addr, |
413 | mon_addr + mon_addr_len, | 413 | mon_addr + mon_addr_len, |
414 | parse_rbd_opts_token, rbd_opts); | 414 | parse_rbd_opts_token, rbd_opts); |
415 | if (IS_ERR(opt)) { | 415 | if (IS_ERR(ceph_opts)) { |
416 | kfree(rbd_opts); | 416 | kfree(rbd_opts); |
417 | return ERR_CAST(opt); | 417 | return ERR_CAST(ceph_opts); |
418 | } | 418 | } |
419 | 419 | ||
420 | spin_lock(&rbd_client_list_lock); | 420 | spin_lock(&rbd_client_list_lock); |
421 | rbdc = __rbd_client_find(opt); | 421 | rbdc = __rbd_client_find(ceph_opts); |
422 | if (rbdc) { | 422 | if (rbdc) { |
423 | /* using an existing client */ | 423 | /* using an existing client */ |
424 | kref_get(&rbdc->kref); | 424 | kref_get(&rbdc->kref); |
425 | spin_unlock(&rbd_client_list_lock); | 425 | spin_unlock(&rbd_client_list_lock); |
426 | 426 | ||
427 | ceph_destroy_options(opt); | 427 | ceph_destroy_options(ceph_opts); |
428 | kfree(rbd_opts); | 428 | kfree(rbd_opts); |
429 | 429 | ||
430 | return rbdc; | 430 | return rbdc; |
431 | } | 431 | } |
432 | spin_unlock(&rbd_client_list_lock); | 432 | spin_unlock(&rbd_client_list_lock); |
433 | 433 | ||
434 | rbdc = rbd_client_create(opt, rbd_opts); | 434 | rbdc = rbd_client_create(ceph_opts, rbd_opts); |
435 | 435 | ||
436 | if (IS_ERR(rbdc)) | 436 | if (IS_ERR(rbdc)) |
437 | kfree(rbd_opts); | 437 | kfree(rbd_opts); |
@@ -480,46 +480,60 @@ static void rbd_coll_release(struct kref *kref) | |||
480 | kfree(coll); | 480 | kfree(coll); |
481 | } | 481 | } |
482 | 482 | ||
483 | static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) | ||
484 | { | ||
485 | return !memcmp(&ondisk->text, | ||
486 | RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)); | ||
487 | } | ||
488 | |||
483 | /* | 489 | /* |
484 | * Create a new header structure, translate header format from the on-disk | 490 | * Create a new header structure, translate header format from the on-disk |
485 | * header. | 491 | * header. |
486 | */ | 492 | */ |
487 | static int rbd_header_from_disk(struct rbd_image_header *header, | 493 | static int rbd_header_from_disk(struct rbd_image_header *header, |
488 | struct rbd_image_header_ondisk *ondisk, | 494 | struct rbd_image_header_ondisk *ondisk, |
489 | u32 allocated_snaps, | 495 | u32 allocated_snaps) |
490 | gfp_t gfp_flags) | ||
491 | { | 496 | { |
492 | u32 i, snap_count; | 497 | u32 snap_count; |
493 | 498 | ||
494 | if (memcmp(ondisk, RBD_HEADER_TEXT, sizeof(RBD_HEADER_TEXT))) | 499 | if (!rbd_dev_ondisk_valid(ondisk)) |
495 | return -ENXIO; | 500 | return -ENXIO; |
496 | 501 | ||
497 | snap_count = le32_to_cpu(ondisk->snap_count); | 502 | snap_count = le32_to_cpu(ondisk->snap_count); |
498 | if (snap_count > (UINT_MAX - sizeof(struct ceph_snap_context)) | 503 | if (snap_count > (SIZE_MAX - sizeof(struct ceph_snap_context)) |
499 | / sizeof (*ondisk)) | 504 | / sizeof (u64)) |
500 | return -EINVAL; | 505 | return -EINVAL; |
501 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + | 506 | header->snapc = kmalloc(sizeof(struct ceph_snap_context) + |
502 | snap_count * sizeof(u64), | 507 | snap_count * sizeof(u64), |
503 | gfp_flags); | 508 | GFP_KERNEL); |
504 | if (!header->snapc) | 509 | if (!header->snapc) |
505 | return -ENOMEM; | 510 | return -ENOMEM; |
506 | 511 | ||
507 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
508 | if (snap_count) { | 512 | if (snap_count) { |
513 | header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); | ||
509 | header->snap_names = kmalloc(header->snap_names_len, | 514 | header->snap_names = kmalloc(header->snap_names_len, |
510 | gfp_flags); | 515 | GFP_KERNEL); |
511 | if (!header->snap_names) | 516 | if (!header->snap_names) |
512 | goto err_snapc; | 517 | goto err_snapc; |
513 | header->snap_sizes = kmalloc(snap_count * sizeof(u64), | 518 | header->snap_sizes = kmalloc(snap_count * sizeof(u64), |
514 | gfp_flags); | 519 | GFP_KERNEL); |
515 | if (!header->snap_sizes) | 520 | if (!header->snap_sizes) |
516 | goto err_names; | 521 | goto err_names; |
517 | } else { | 522 | } else { |
523 | WARN_ON(ondisk->snap_names_len); | ||
524 | header->snap_names_len = 0; | ||
518 | header->snap_names = NULL; | 525 | header->snap_names = NULL; |
519 | header->snap_sizes = NULL; | 526 | header->snap_sizes = NULL; |
520 | } | 527 | } |
521 | memcpy(header->block_name, ondisk->block_name, | 528 | |
529 | header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1, | ||
530 | GFP_KERNEL); | ||
531 | if (!header->object_prefix) | ||
532 | goto err_sizes; | ||
533 | |||
534 | memcpy(header->object_prefix, ondisk->block_name, | ||
522 | sizeof(ondisk->block_name)); | 535 | sizeof(ondisk->block_name)); |
536 | header->object_prefix[sizeof (ondisk->block_name)] = '\0'; | ||
523 | 537 | ||
524 | header->image_size = le64_to_cpu(ondisk->image_size); | 538 | header->image_size = le64_to_cpu(ondisk->image_size); |
525 | header->obj_order = ondisk->options.order; | 539 | header->obj_order = ondisk->options.order; |
@@ -527,11 +541,13 @@ static int rbd_header_from_disk(struct rbd_image_header *header, | |||
527 | header->comp_type = ondisk->options.comp_type; | 541 | header->comp_type = ondisk->options.comp_type; |
528 | 542 | ||
529 | atomic_set(&header->snapc->nref, 1); | 543 | atomic_set(&header->snapc->nref, 1); |
530 | header->snap_seq = le64_to_cpu(ondisk->snap_seq); | 544 | header->snapc->seq = le64_to_cpu(ondisk->snap_seq); |
531 | header->snapc->num_snaps = snap_count; | 545 | header->snapc->num_snaps = snap_count; |
532 | header->total_snaps = snap_count; | 546 | header->total_snaps = snap_count; |
533 | 547 | ||
534 | if (snap_count && allocated_snaps == snap_count) { | 548 | if (snap_count && allocated_snaps == snap_count) { |
549 | int i; | ||
550 | |||
535 | for (i = 0; i < snap_count; i++) { | 551 | for (i = 0; i < snap_count; i++) { |
536 | header->snapc->snaps[i] = | 552 | header->snapc->snaps[i] = |
537 | le64_to_cpu(ondisk->snaps[i].id); | 553 | le64_to_cpu(ondisk->snaps[i].id); |
@@ -540,16 +556,22 @@ static int rbd_header_from_disk(struct rbd_image_header *header, | |||
540 | } | 556 | } |
541 | 557 | ||
542 | /* copy snapshot names */ | 558 | /* copy snapshot names */ |
543 | memcpy(header->snap_names, &ondisk->snaps[i], | 559 | memcpy(header->snap_names, &ondisk->snaps[snap_count], |
544 | header->snap_names_len); | 560 | header->snap_names_len); |
545 | } | 561 | } |
546 | 562 | ||
547 | return 0; | 563 | return 0; |
548 | 564 | ||
565 | err_sizes: | ||
566 | kfree(header->snap_sizes); | ||
567 | header->snap_sizes = NULL; | ||
549 | err_names: | 568 | err_names: |
550 | kfree(header->snap_names); | 569 | kfree(header->snap_names); |
570 | header->snap_names = NULL; | ||
551 | err_snapc: | 571 | err_snapc: |
552 | kfree(header->snapc); | 572 | kfree(header->snapc); |
573 | header->snapc = NULL; | ||
574 | |||
553 | return -ENOMEM; | 575 | return -ENOMEM; |
554 | } | 576 | } |
555 | 577 | ||
@@ -575,52 +597,50 @@ static int snap_by_name(struct rbd_image_header *header, const char *snap_name, | |||
575 | return -ENOENT; | 597 | return -ENOENT; |
576 | } | 598 | } |
577 | 599 | ||
578 | static int rbd_header_set_snap(struct rbd_device *dev, u64 *size) | 600 | static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size) |
579 | { | 601 | { |
580 | struct rbd_image_header *header = &dev->header; | 602 | int ret; |
581 | struct ceph_snap_context *snapc = header->snapc; | ||
582 | int ret = -ENOENT; | ||
583 | |||
584 | BUILD_BUG_ON(sizeof (dev->snap_name) < sizeof (RBD_SNAP_HEAD_NAME)); | ||
585 | 603 | ||
586 | down_write(&dev->header_rwsem); | 604 | down_write(&rbd_dev->header_rwsem); |
587 | 605 | ||
588 | if (!memcmp(dev->snap_name, RBD_SNAP_HEAD_NAME, | 606 | if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, |
589 | sizeof (RBD_SNAP_HEAD_NAME))) { | 607 | sizeof (RBD_SNAP_HEAD_NAME))) { |
590 | if (header->total_snaps) | 608 | rbd_dev->snap_id = CEPH_NOSNAP; |
591 | snapc->seq = header->snap_seq; | 609 | rbd_dev->snap_exists = false; |
592 | else | 610 | rbd_dev->read_only = 0; |
593 | snapc->seq = 0; | ||
594 | dev->snap_id = CEPH_NOSNAP; | ||
595 | dev->read_only = 0; | ||
596 | if (size) | 611 | if (size) |
597 | *size = header->image_size; | 612 | *size = rbd_dev->header.image_size; |
598 | } else { | 613 | } else { |
599 | ret = snap_by_name(header, dev->snap_name, &snapc->seq, size); | 614 | u64 snap_id = 0; |
615 | |||
616 | ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name, | ||
617 | &snap_id, size); | ||
600 | if (ret < 0) | 618 | if (ret < 0) |
601 | goto done; | 619 | goto done; |
602 | dev->snap_id = snapc->seq; | 620 | rbd_dev->snap_id = snap_id; |
603 | dev->read_only = 1; | 621 | rbd_dev->snap_exists = true; |
622 | rbd_dev->read_only = 1; | ||
604 | } | 623 | } |
605 | 624 | ||
606 | ret = 0; | 625 | ret = 0; |
607 | done: | 626 | done: |
608 | up_write(&dev->header_rwsem); | 627 | up_write(&rbd_dev->header_rwsem); |
609 | return ret; | 628 | return ret; |
610 | } | 629 | } |
611 | 630 | ||
612 | static void rbd_header_free(struct rbd_image_header *header) | 631 | static void rbd_header_free(struct rbd_image_header *header) |
613 | { | 632 | { |
614 | kfree(header->snapc); | 633 | kfree(header->object_prefix); |
615 | kfree(header->snap_names); | ||
616 | kfree(header->snap_sizes); | 634 | kfree(header->snap_sizes); |
635 | kfree(header->snap_names); | ||
636 | ceph_put_snap_context(header->snapc); | ||
617 | } | 637 | } |
618 | 638 | ||
619 | /* | 639 | /* |
620 | * get the actual striped segment name, offset and length | 640 | * get the actual striped segment name, offset and length |
621 | */ | 641 | */ |
622 | static u64 rbd_get_segment(struct rbd_image_header *header, | 642 | static u64 rbd_get_segment(struct rbd_image_header *header, |
623 | const char *block_name, | 643 | const char *object_prefix, |
624 | u64 ofs, u64 len, | 644 | u64 ofs, u64 len, |
625 | char *seg_name, u64 *segofs) | 645 | char *seg_name, u64 *segofs) |
626 | { | 646 | { |
@@ -628,7 +648,7 @@ static u64 rbd_get_segment(struct rbd_image_header *header, | |||
628 | 648 | ||
629 | if (seg_name) | 649 | if (seg_name) |
630 | snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, | 650 | snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, |
631 | "%s.%012llx", block_name, seg); | 651 | "%s.%012llx", object_prefix, seg); |
632 | 652 | ||
633 | ofs = ofs & ((1 << header->obj_order) - 1); | 653 | ofs = ofs & ((1 << header->obj_order) - 1); |
634 | len = min_t(u64, len, (1 << header->obj_order) - ofs); | 654 | len = min_t(u64, len, (1 << header->obj_order) - ofs); |
@@ -726,9 +746,8 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next, | |||
726 | * split_bio will BUG_ON if this is not the case | 746 | * split_bio will BUG_ON if this is not the case |
727 | */ | 747 | */ |
728 | dout("bio_chain_clone split! total=%d remaining=%d" | 748 | dout("bio_chain_clone split! total=%d remaining=%d" |
729 | "bi_size=%d\n", | 749 | "bi_size=%u\n", |
730 | (int)total, (int)len-total, | 750 | total, len - total, old_chain->bi_size); |
731 | (int)old_chain->bi_size); | ||
732 | 751 | ||
733 | /* split the bio. We'll release it either in the next | 752 | /* split the bio. We'll release it either in the next |
734 | call, or it will have to be released outside */ | 753 | call, or it will have to be released outside */ |
@@ -777,22 +796,24 @@ err_out: | |||
777 | /* | 796 | /* |
778 | * helpers for osd request op vectors. | 797 | * helpers for osd request op vectors. |
779 | */ | 798 | */ |
780 | static int rbd_create_rw_ops(struct ceph_osd_req_op **ops, | 799 | static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, |
781 | int num_ops, | 800 | int opcode, u32 payload_len) |
782 | int opcode, | 801 | { |
783 | u32 payload_len) | 802 | struct ceph_osd_req_op *ops; |
784 | { | 803 | |
785 | *ops = kzalloc(sizeof(struct ceph_osd_req_op) * (num_ops + 1), | 804 | ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); |
786 | GFP_NOIO); | 805 | if (!ops) |
787 | if (!*ops) | 806 | return NULL; |
788 | return -ENOMEM; | 807 | |
789 | (*ops)[0].op = opcode; | 808 | ops[0].op = opcode; |
809 | |||
790 | /* | 810 | /* |
791 | * op extent offset and length will be set later on | 811 | * op extent offset and length will be set later on |
792 | * in calc_raw_layout() | 812 | * in calc_raw_layout() |
793 | */ | 813 | */ |
794 | (*ops)[0].payload_len = payload_len; | 814 | ops[0].payload_len = payload_len; |
795 | return 0; | 815 | |
816 | return ops; | ||
796 | } | 817 | } |
797 | 818 | ||
798 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) | 819 | static void rbd_destroy_ops(struct ceph_osd_req_op *ops) |
@@ -808,8 +829,8 @@ static void rbd_coll_end_req_index(struct request *rq, | |||
808 | struct request_queue *q; | 829 | struct request_queue *q; |
809 | int min, max, i; | 830 | int min, max, i; |
810 | 831 | ||
811 | dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n", | 832 | dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", |
812 | coll, index, ret, len); | 833 | coll, index, ret, (unsigned long long) len); |
813 | 834 | ||
814 | if (!rq) | 835 | if (!rq) |
815 | return; | 836 | return; |
@@ -848,16 +869,15 @@ static void rbd_coll_end_req(struct rbd_request *req, | |||
848 | * Send ceph osd request | 869 | * Send ceph osd request |
849 | */ | 870 | */ |
850 | static int rbd_do_request(struct request *rq, | 871 | static int rbd_do_request(struct request *rq, |
851 | struct rbd_device *dev, | 872 | struct rbd_device *rbd_dev, |
852 | struct ceph_snap_context *snapc, | 873 | struct ceph_snap_context *snapc, |
853 | u64 snapid, | 874 | u64 snapid, |
854 | const char *obj, u64 ofs, u64 len, | 875 | const char *object_name, u64 ofs, u64 len, |
855 | struct bio *bio, | 876 | struct bio *bio, |
856 | struct page **pages, | 877 | struct page **pages, |
857 | int num_pages, | 878 | int num_pages, |
858 | int flags, | 879 | int flags, |
859 | struct ceph_osd_req_op *ops, | 880 | struct ceph_osd_req_op *ops, |
860 | int num_reply, | ||
861 | struct rbd_req_coll *coll, | 881 | struct rbd_req_coll *coll, |
862 | int coll_index, | 882 | int coll_index, |
863 | void (*rbd_cb)(struct ceph_osd_request *req, | 883 | void (*rbd_cb)(struct ceph_osd_request *req, |
@@ -887,15 +907,13 @@ static int rbd_do_request(struct request *rq, | |||
887 | req_data->coll_index = coll_index; | 907 | req_data->coll_index = coll_index; |
888 | } | 908 | } |
889 | 909 | ||
890 | dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); | 910 | dout("rbd_do_request object_name=%s ofs=%llu len=%llu\n", object_name, |
891 | 911 | (unsigned long long) ofs, (unsigned long long) len); | |
892 | down_read(&dev->header_rwsem); | ||
893 | 912 | ||
894 | osdc = &dev->rbd_client->client->osdc; | 913 | osdc = &rbd_dev->rbd_client->client->osdc; |
895 | req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, | 914 | req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, |
896 | false, GFP_NOIO, pages, bio); | 915 | false, GFP_NOIO, pages, bio); |
897 | if (!req) { | 916 | if (!req) { |
898 | up_read(&dev->header_rwsem); | ||
899 | ret = -ENOMEM; | 917 | ret = -ENOMEM; |
900 | goto done_pages; | 918 | goto done_pages; |
901 | } | 919 | } |
@@ -912,7 +930,7 @@ static int rbd_do_request(struct request *rq, | |||
912 | reqhead = req->r_request->front.iov_base; | 930 | reqhead = req->r_request->front.iov_base; |
913 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); | 931 | reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); |
914 | 932 | ||
915 | strncpy(req->r_oid, obj, sizeof(req->r_oid)); | 933 | strncpy(req->r_oid, object_name, sizeof(req->r_oid)); |
916 | req->r_oid_len = strlen(req->r_oid); | 934 | req->r_oid_len = strlen(req->r_oid); |
917 | 935 | ||
918 | layout = &req->r_file_layout; | 936 | layout = &req->r_file_layout; |
@@ -920,7 +938,7 @@ static int rbd_do_request(struct request *rq, | |||
920 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | 938 | layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); |
921 | layout->fl_stripe_count = cpu_to_le32(1); | 939 | layout->fl_stripe_count = cpu_to_le32(1); |
922 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); | 940 | layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); |
923 | layout->fl_pg_pool = cpu_to_le32(dev->poolid); | 941 | layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); |
924 | ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, | 942 | ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, |
925 | req, ops); | 943 | req, ops); |
926 | 944 | ||
@@ -929,7 +947,6 @@ static int rbd_do_request(struct request *rq, | |||
929 | snapc, | 947 | snapc, |
930 | &mtime, | 948 | &mtime, |
931 | req->r_oid, req->r_oid_len); | 949 | req->r_oid, req->r_oid_len); |
932 | up_read(&dev->header_rwsem); | ||
933 | 950 | ||
934 | if (linger_req) { | 951 | if (linger_req) { |
935 | ceph_osdc_set_request_linger(osdc, req); | 952 | ceph_osdc_set_request_linger(osdc, req); |
@@ -944,8 +961,9 @@ static int rbd_do_request(struct request *rq, | |||
944 | ret = ceph_osdc_wait_request(osdc, req); | 961 | ret = ceph_osdc_wait_request(osdc, req); |
945 | if (ver) | 962 | if (ver) |
946 | *ver = le64_to_cpu(req->r_reassert_version.version); | 963 | *ver = le64_to_cpu(req->r_reassert_version.version); |
947 | dout("reassert_ver=%lld\n", | 964 | dout("reassert_ver=%llu\n", |
948 | le64_to_cpu(req->r_reassert_version.version)); | 965 | (unsigned long long) |
966 | le64_to_cpu(req->r_reassert_version.version)); | ||
949 | ceph_osdc_put_request(req); | 967 | ceph_osdc_put_request(req); |
950 | } | 968 | } |
951 | return ret; | 969 | return ret; |
@@ -979,7 +997,8 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) | |||
979 | bytes = le64_to_cpu(op->extent.length); | 997 | bytes = le64_to_cpu(op->extent.length); |
980 | read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); | 998 | read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); |
981 | 999 | ||
982 | dout("rbd_req_cb bytes=%lld readop=%d rc=%d\n", bytes, read_op, rc); | 1000 | dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", |
1001 | (unsigned long long) bytes, read_op, (int) rc); | ||
983 | 1002 | ||
984 | if (rc == -ENOENT && read_op) { | 1003 | if (rc == -ENOENT && read_op) { |
985 | zero_bio_chain(req_data->bio, 0); | 1004 | zero_bio_chain(req_data->bio, 0); |
@@ -1006,14 +1025,12 @@ static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg | |||
1006 | /* | 1025 | /* |
1007 | * Do a synchronous ceph osd operation | 1026 | * Do a synchronous ceph osd operation |
1008 | */ | 1027 | */ |
1009 | static int rbd_req_sync_op(struct rbd_device *dev, | 1028 | static int rbd_req_sync_op(struct rbd_device *rbd_dev, |
1010 | struct ceph_snap_context *snapc, | 1029 | struct ceph_snap_context *snapc, |
1011 | u64 snapid, | 1030 | u64 snapid, |
1012 | int opcode, | ||
1013 | int flags, | 1031 | int flags, |
1014 | struct ceph_osd_req_op *orig_ops, | 1032 | struct ceph_osd_req_op *ops, |
1015 | int num_reply, | 1033 | const char *object_name, |
1016 | const char *obj, | ||
1017 | u64 ofs, u64 len, | 1034 | u64 ofs, u64 len, |
1018 | char *buf, | 1035 | char *buf, |
1019 | struct ceph_osd_request **linger_req, | 1036 | struct ceph_osd_request **linger_req, |
@@ -1022,45 +1039,28 @@ static int rbd_req_sync_op(struct rbd_device *dev, | |||
1022 | int ret; | 1039 | int ret; |
1023 | struct page **pages; | 1040 | struct page **pages; |
1024 | int num_pages; | 1041 | int num_pages; |
1025 | struct ceph_osd_req_op *ops = orig_ops; | 1042 | |
1026 | u32 payload_len; | 1043 | BUG_ON(ops == NULL); |
1027 | 1044 | ||
1028 | num_pages = calc_pages_for(ofs , len); | 1045 | num_pages = calc_pages_for(ofs , len); |
1029 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | 1046 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); |
1030 | if (IS_ERR(pages)) | 1047 | if (IS_ERR(pages)) |
1031 | return PTR_ERR(pages); | 1048 | return PTR_ERR(pages); |
1032 | 1049 | ||
1033 | if (!orig_ops) { | 1050 | ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, |
1034 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? len : 0); | 1051 | object_name, ofs, len, NULL, |
1035 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | ||
1036 | if (ret < 0) | ||
1037 | goto done; | ||
1038 | |||
1039 | if ((flags & CEPH_OSD_FLAG_WRITE) && buf) { | ||
1040 | ret = ceph_copy_to_page_vector(pages, buf, ofs, len); | ||
1041 | if (ret < 0) | ||
1042 | goto done_ops; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | ret = rbd_do_request(NULL, dev, snapc, snapid, | ||
1047 | obj, ofs, len, NULL, | ||
1048 | pages, num_pages, | 1052 | pages, num_pages, |
1049 | flags, | 1053 | flags, |
1050 | ops, | 1054 | ops, |
1051 | 2, | ||
1052 | NULL, 0, | 1055 | NULL, 0, |
1053 | NULL, | 1056 | NULL, |
1054 | linger_req, ver); | 1057 | linger_req, ver); |
1055 | if (ret < 0) | 1058 | if (ret < 0) |
1056 | goto done_ops; | 1059 | goto done; |
1057 | 1060 | ||
1058 | if ((flags & CEPH_OSD_FLAG_READ) && buf) | 1061 | if ((flags & CEPH_OSD_FLAG_READ) && buf) |
1059 | ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); | 1062 | ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); |
1060 | 1063 | ||
1061 | done_ops: | ||
1062 | if (!orig_ops) | ||
1063 | rbd_destroy_ops(ops); | ||
1064 | done: | 1064 | done: |
1065 | ceph_release_page_vector(pages, num_pages); | 1065 | ceph_release_page_vector(pages, num_pages); |
1066 | return ret; | 1066 | return ret; |
@@ -1070,10 +1070,10 @@ done: | |||
1070 | * Do an asynchronous ceph osd operation | 1070 | * Do an asynchronous ceph osd operation |
1071 | */ | 1071 | */ |
1072 | static int rbd_do_op(struct request *rq, | 1072 | static int rbd_do_op(struct request *rq, |
1073 | struct rbd_device *rbd_dev , | 1073 | struct rbd_device *rbd_dev, |
1074 | struct ceph_snap_context *snapc, | 1074 | struct ceph_snap_context *snapc, |
1075 | u64 snapid, | 1075 | u64 snapid, |
1076 | int opcode, int flags, int num_reply, | 1076 | int opcode, int flags, |
1077 | u64 ofs, u64 len, | 1077 | u64 ofs, u64 len, |
1078 | struct bio *bio, | 1078 | struct bio *bio, |
1079 | struct rbd_req_coll *coll, | 1079 | struct rbd_req_coll *coll, |
@@ -1091,14 +1091,15 @@ static int rbd_do_op(struct request *rq, | |||
1091 | return -ENOMEM; | 1091 | return -ENOMEM; |
1092 | 1092 | ||
1093 | seg_len = rbd_get_segment(&rbd_dev->header, | 1093 | seg_len = rbd_get_segment(&rbd_dev->header, |
1094 | rbd_dev->header.block_name, | 1094 | rbd_dev->header.object_prefix, |
1095 | ofs, len, | 1095 | ofs, len, |
1096 | seg_name, &seg_ofs); | 1096 | seg_name, &seg_ofs); |
1097 | 1097 | ||
1098 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); | 1098 | payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); |
1099 | 1099 | ||
1100 | ret = rbd_create_rw_ops(&ops, 1, opcode, payload_len); | 1100 | ret = -ENOMEM; |
1101 | if (ret < 0) | 1101 | ops = rbd_create_rw_ops(1, opcode, payload_len); |
1102 | if (!ops) | ||
1102 | goto done; | 1103 | goto done; |
1103 | 1104 | ||
1104 | /* we've taken care of segment sizes earlier when we | 1105 | /* we've taken care of segment sizes earlier when we |
@@ -1112,7 +1113,6 @@ static int rbd_do_op(struct request *rq, | |||
1112 | NULL, 0, | 1113 | NULL, 0, |
1113 | flags, | 1114 | flags, |
1114 | ops, | 1115 | ops, |
1115 | num_reply, | ||
1116 | coll, coll_index, | 1116 | coll, coll_index, |
1117 | rbd_req_cb, 0, NULL); | 1117 | rbd_req_cb, 0, NULL); |
1118 | 1118 | ||
@@ -1136,7 +1136,6 @@ static int rbd_req_write(struct request *rq, | |||
1136 | return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, | 1136 | return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, |
1137 | CEPH_OSD_OP_WRITE, | 1137 | CEPH_OSD_OP_WRITE, |
1138 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1138 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, |
1139 | 2, | ||
1140 | ofs, len, bio, coll, coll_index); | 1139 | ofs, len, bio, coll, coll_index); |
1141 | } | 1140 | } |
1142 | 1141 | ||
@@ -1155,55 +1154,58 @@ static int rbd_req_read(struct request *rq, | |||
1155 | snapid, | 1154 | snapid, |
1156 | CEPH_OSD_OP_READ, | 1155 | CEPH_OSD_OP_READ, |
1157 | CEPH_OSD_FLAG_READ, | 1156 | CEPH_OSD_FLAG_READ, |
1158 | 2, | ||
1159 | ofs, len, bio, coll, coll_index); | 1157 | ofs, len, bio, coll, coll_index); |
1160 | } | 1158 | } |
1161 | 1159 | ||
1162 | /* | 1160 | /* |
1163 | * Request sync osd read | 1161 | * Request sync osd read |
1164 | */ | 1162 | */ |
1165 | static int rbd_req_sync_read(struct rbd_device *dev, | 1163 | static int rbd_req_sync_read(struct rbd_device *rbd_dev, |
1166 | struct ceph_snap_context *snapc, | ||
1167 | u64 snapid, | 1164 | u64 snapid, |
1168 | const char *obj, | 1165 | const char *object_name, |
1169 | u64 ofs, u64 len, | 1166 | u64 ofs, u64 len, |
1170 | char *buf, | 1167 | char *buf, |
1171 | u64 *ver) | 1168 | u64 *ver) |
1172 | { | 1169 | { |
1173 | return rbd_req_sync_op(dev, NULL, | 1170 | struct ceph_osd_req_op *ops; |
1171 | int ret; | ||
1172 | |||
1173 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); | ||
1174 | if (!ops) | ||
1175 | return -ENOMEM; | ||
1176 | |||
1177 | ret = rbd_req_sync_op(rbd_dev, NULL, | ||
1174 | snapid, | 1178 | snapid, |
1175 | CEPH_OSD_OP_READ, | ||
1176 | CEPH_OSD_FLAG_READ, | 1179 | CEPH_OSD_FLAG_READ, |
1177 | NULL, | 1180 | ops, object_name, ofs, len, buf, NULL, ver); |
1178 | 1, obj, ofs, len, buf, NULL, ver); | 1181 | rbd_destroy_ops(ops); |
1182 | |||
1183 | return ret; | ||
1179 | } | 1184 | } |
1180 | 1185 | ||
1181 | /* | 1186 | /* |
1182 | * Request sync osd watch | 1187 | * Request sync osd watch |
1183 | */ | 1188 | */ |
1184 | static int rbd_req_sync_notify_ack(struct rbd_device *dev, | 1189 | static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, |
1185 | u64 ver, | 1190 | u64 ver, |
1186 | u64 notify_id, | 1191 | u64 notify_id) |
1187 | const char *obj) | ||
1188 | { | 1192 | { |
1189 | struct ceph_osd_req_op *ops; | 1193 | struct ceph_osd_req_op *ops; |
1190 | struct page **pages = NULL; | ||
1191 | int ret; | 1194 | int ret; |
1192 | 1195 | ||
1193 | ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); | 1196 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); |
1194 | if (ret < 0) | 1197 | if (!ops) |
1195 | return ret; | 1198 | return -ENOMEM; |
1196 | 1199 | ||
1197 | ops[0].watch.ver = cpu_to_le64(dev->header.obj_version); | 1200 | ops[0].watch.ver = cpu_to_le64(ver); |
1198 | ops[0].watch.cookie = notify_id; | 1201 | ops[0].watch.cookie = notify_id; |
1199 | ops[0].watch.flag = 0; | 1202 | ops[0].watch.flag = 0; |
1200 | 1203 | ||
1201 | ret = rbd_do_request(NULL, dev, NULL, CEPH_NOSNAP, | 1204 | ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, |
1202 | obj, 0, 0, NULL, | 1205 | rbd_dev->header_name, 0, 0, NULL, |
1203 | pages, 0, | 1206 | NULL, 0, |
1204 | CEPH_OSD_FLAG_READ, | 1207 | CEPH_OSD_FLAG_READ, |
1205 | ops, | 1208 | ops, |
1206 | 1, | ||
1207 | NULL, 0, | 1209 | NULL, 0, |
1208 | rbd_simple_req_cb, 0, NULL); | 1210 | rbd_simple_req_cb, 0, NULL); |
1209 | 1211 | ||
@@ -1213,54 +1215,53 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, | |||
1213 | 1215 | ||
1214 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | 1216 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) |
1215 | { | 1217 | { |
1216 | struct rbd_device *dev = (struct rbd_device *)data; | 1218 | struct rbd_device *rbd_dev = (struct rbd_device *)data; |
1219 | u64 hver; | ||
1217 | int rc; | 1220 | int rc; |
1218 | 1221 | ||
1219 | if (!dev) | 1222 | if (!rbd_dev) |
1220 | return; | 1223 | return; |
1221 | 1224 | ||
1222 | dout("rbd_watch_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, | 1225 | dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", |
1223 | notify_id, (int)opcode); | 1226 | rbd_dev->header_name, (unsigned long long) notify_id, |
1224 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 1227 | (unsigned int) opcode); |
1225 | rc = __rbd_refresh_header(dev); | 1228 | rc = rbd_refresh_header(rbd_dev, &hver); |
1226 | mutex_unlock(&ctl_mutex); | ||
1227 | if (rc) | 1229 | if (rc) |
1228 | pr_warning(RBD_DRV_NAME "%d got notification but failed to " | 1230 | pr_warning(RBD_DRV_NAME "%d got notification but failed to " |
1229 | " update snaps: %d\n", dev->major, rc); | 1231 | " update snaps: %d\n", rbd_dev->major, rc); |
1230 | 1232 | ||
1231 | rbd_req_sync_notify_ack(dev, ver, notify_id, dev->obj_md_name); | 1233 | rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); |
1232 | } | 1234 | } |
1233 | 1235 | ||
1234 | /* | 1236 | /* |
1235 | * Request sync osd watch | 1237 | * Request sync osd watch |
1236 | */ | 1238 | */ |
1237 | static int rbd_req_sync_watch(struct rbd_device *dev, | 1239 | static int rbd_req_sync_watch(struct rbd_device *rbd_dev) |
1238 | const char *obj, | ||
1239 | u64 ver) | ||
1240 | { | 1240 | { |
1241 | struct ceph_osd_req_op *ops; | 1241 | struct ceph_osd_req_op *ops; |
1242 | struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; | 1242 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
1243 | int ret; | ||
1243 | 1244 | ||
1244 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); | 1245 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); |
1245 | if (ret < 0) | 1246 | if (!ops) |
1246 | return ret; | 1247 | return -ENOMEM; |
1247 | 1248 | ||
1248 | ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, | 1249 | ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, |
1249 | (void *)dev, &dev->watch_event); | 1250 | (void *)rbd_dev, &rbd_dev->watch_event); |
1250 | if (ret < 0) | 1251 | if (ret < 0) |
1251 | goto fail; | 1252 | goto fail; |
1252 | 1253 | ||
1253 | ops[0].watch.ver = cpu_to_le64(ver); | 1254 | ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); |
1254 | ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); | 1255 | ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); |
1255 | ops[0].watch.flag = 1; | 1256 | ops[0].watch.flag = 1; |
1256 | 1257 | ||
1257 | ret = rbd_req_sync_op(dev, NULL, | 1258 | ret = rbd_req_sync_op(rbd_dev, NULL, |
1258 | CEPH_NOSNAP, | 1259 | CEPH_NOSNAP, |
1259 | 0, | ||
1260 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1260 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, |
1261 | ops, | 1261 | ops, |
1262 | 1, obj, 0, 0, NULL, | 1262 | rbd_dev->header_name, |
1263 | &dev->watch_request, NULL); | 1263 | 0, 0, NULL, |
1264 | &rbd_dev->watch_request, NULL); | ||
1264 | 1265 | ||
1265 | if (ret < 0) | 1266 | if (ret < 0) |
1266 | goto fail_event; | 1267 | goto fail_event; |
@@ -1269,8 +1270,8 @@ static int rbd_req_sync_watch(struct rbd_device *dev, | |||
1269 | return 0; | 1270 | return 0; |
1270 | 1271 | ||
1271 | fail_event: | 1272 | fail_event: |
1272 | ceph_osdc_cancel_event(dev->watch_event); | 1273 | ceph_osdc_cancel_event(rbd_dev->watch_event); |
1273 | dev->watch_event = NULL; | 1274 | rbd_dev->watch_event = NULL; |
1274 | fail: | 1275 | fail: |
1275 | rbd_destroy_ops(ops); | 1276 | rbd_destroy_ops(ops); |
1276 | return ret; | 1277 | return ret; |
@@ -1279,64 +1280,65 @@ fail: | |||
1279 | /* | 1280 | /* |
1280 | * Request sync osd unwatch | 1281 | * Request sync osd unwatch |
1281 | */ | 1282 | */ |
1282 | static int rbd_req_sync_unwatch(struct rbd_device *dev, | 1283 | static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) |
1283 | const char *obj) | ||
1284 | { | 1284 | { |
1285 | struct ceph_osd_req_op *ops; | 1285 | struct ceph_osd_req_op *ops; |
1286 | int ret; | ||
1286 | 1287 | ||
1287 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_WATCH, 0); | 1288 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); |
1288 | if (ret < 0) | 1289 | if (!ops) |
1289 | return ret; | 1290 | return -ENOMEM; |
1290 | 1291 | ||
1291 | ops[0].watch.ver = 0; | 1292 | ops[0].watch.ver = 0; |
1292 | ops[0].watch.cookie = cpu_to_le64(dev->watch_event->cookie); | 1293 | ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); |
1293 | ops[0].watch.flag = 0; | 1294 | ops[0].watch.flag = 0; |
1294 | 1295 | ||
1295 | ret = rbd_req_sync_op(dev, NULL, | 1296 | ret = rbd_req_sync_op(rbd_dev, NULL, |
1296 | CEPH_NOSNAP, | 1297 | CEPH_NOSNAP, |
1297 | 0, | ||
1298 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1298 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, |
1299 | ops, | 1299 | ops, |
1300 | 1, obj, 0, 0, NULL, NULL, NULL); | 1300 | rbd_dev->header_name, |
1301 | 0, 0, NULL, NULL, NULL); | ||
1302 | |||
1301 | 1303 | ||
1302 | rbd_destroy_ops(ops); | 1304 | rbd_destroy_ops(ops); |
1303 | ceph_osdc_cancel_event(dev->watch_event); | 1305 | ceph_osdc_cancel_event(rbd_dev->watch_event); |
1304 | dev->watch_event = NULL; | 1306 | rbd_dev->watch_event = NULL; |
1305 | return ret; | 1307 | return ret; |
1306 | } | 1308 | } |
1307 | 1309 | ||
1308 | struct rbd_notify_info { | 1310 | struct rbd_notify_info { |
1309 | struct rbd_device *dev; | 1311 | struct rbd_device *rbd_dev; |
1310 | }; | 1312 | }; |
1311 | 1313 | ||
1312 | static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | 1314 | static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data) |
1313 | { | 1315 | { |
1314 | struct rbd_device *dev = (struct rbd_device *)data; | 1316 | struct rbd_device *rbd_dev = (struct rbd_device *)data; |
1315 | if (!dev) | 1317 | if (!rbd_dev) |
1316 | return; | 1318 | return; |
1317 | 1319 | ||
1318 | dout("rbd_notify_cb %s notify_id=%lld opcode=%d\n", dev->obj_md_name, | 1320 | dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n", |
1319 | notify_id, (int)opcode); | 1321 | rbd_dev->header_name, (unsigned long long) notify_id, |
1322 | (unsigned int) opcode); | ||
1320 | } | 1323 | } |
1321 | 1324 | ||
1322 | /* | 1325 | /* |
1323 | * Request sync osd notify | 1326 | * Request sync osd notify |
1324 | */ | 1327 | */ |
1325 | static int rbd_req_sync_notify(struct rbd_device *dev, | 1328 | static int rbd_req_sync_notify(struct rbd_device *rbd_dev) |
1326 | const char *obj) | ||
1327 | { | 1329 | { |
1328 | struct ceph_osd_req_op *ops; | 1330 | struct ceph_osd_req_op *ops; |
1329 | struct ceph_osd_client *osdc = &dev->rbd_client->client->osdc; | 1331 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
1330 | struct ceph_osd_event *event; | 1332 | struct ceph_osd_event *event; |
1331 | struct rbd_notify_info info; | 1333 | struct rbd_notify_info info; |
1332 | int payload_len = sizeof(u32) + sizeof(u32); | 1334 | int payload_len = sizeof(u32) + sizeof(u32); |
1333 | int ret; | 1335 | int ret; |
1334 | 1336 | ||
1335 | ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY, payload_len); | 1337 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len); |
1336 | if (ret < 0) | 1338 | if (!ops) |
1337 | return ret; | 1339 | return -ENOMEM; |
1338 | 1340 | ||
1339 | info.dev = dev; | 1341 | info.rbd_dev = rbd_dev; |
1340 | 1342 | ||
1341 | ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, | 1343 | ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1, |
1342 | (void *)&info, &event); | 1344 | (void *)&info, &event); |
@@ -1349,12 +1351,12 @@ static int rbd_req_sync_notify(struct rbd_device *dev, | |||
1349 | ops[0].watch.prot_ver = RADOS_NOTIFY_VER; | 1351 | ops[0].watch.prot_ver = RADOS_NOTIFY_VER; |
1350 | ops[0].watch.timeout = 12; | 1352 | ops[0].watch.timeout = 12; |
1351 | 1353 | ||
1352 | ret = rbd_req_sync_op(dev, NULL, | 1354 | ret = rbd_req_sync_op(rbd_dev, NULL, |
1353 | CEPH_NOSNAP, | 1355 | CEPH_NOSNAP, |
1354 | 0, | ||
1355 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1356 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, |
1356 | ops, | 1357 | ops, |
1357 | 1, obj, 0, 0, NULL, NULL, NULL); | 1358 | rbd_dev->header_name, |
1359 | 0, 0, NULL, NULL, NULL); | ||
1358 | if (ret < 0) | 1360 | if (ret < 0) |
1359 | goto fail_event; | 1361 | goto fail_event; |
1360 | 1362 | ||
@@ -1373,36 +1375,37 @@ fail: | |||
1373 | /* | 1375 | /* |
1374 | * Request sync osd read | 1376 | * Request sync osd read |
1375 | */ | 1377 | */ |
1376 | static int rbd_req_sync_exec(struct rbd_device *dev, | 1378 | static int rbd_req_sync_exec(struct rbd_device *rbd_dev, |
1377 | const char *obj, | 1379 | const char *object_name, |
1378 | const char *cls, | 1380 | const char *class_name, |
1379 | const char *method, | 1381 | const char *method_name, |
1380 | const char *data, | 1382 | const char *data, |
1381 | int len, | 1383 | int len, |
1382 | u64 *ver) | 1384 | u64 *ver) |
1383 | { | 1385 | { |
1384 | struct ceph_osd_req_op *ops; | 1386 | struct ceph_osd_req_op *ops; |
1385 | int cls_len = strlen(cls); | 1387 | int class_name_len = strlen(class_name); |
1386 | int method_len = strlen(method); | 1388 | int method_name_len = strlen(method_name); |
1387 | int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_CALL, | 1389 | int ret; |
1388 | cls_len + method_len + len); | ||
1389 | if (ret < 0) | ||
1390 | return ret; | ||
1391 | 1390 | ||
1392 | ops[0].cls.class_name = cls; | 1391 | ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, |
1393 | ops[0].cls.class_len = (__u8)cls_len; | 1392 | class_name_len + method_name_len + len); |
1394 | ops[0].cls.method_name = method; | 1393 | if (!ops) |
1395 | ops[0].cls.method_len = (__u8)method_len; | 1394 | return -ENOMEM; |
1395 | |||
1396 | ops[0].cls.class_name = class_name; | ||
1397 | ops[0].cls.class_len = (__u8) class_name_len; | ||
1398 | ops[0].cls.method_name = method_name; | ||
1399 | ops[0].cls.method_len = (__u8) method_name_len; | ||
1396 | ops[0].cls.argc = 0; | 1400 | ops[0].cls.argc = 0; |
1397 | ops[0].cls.indata = data; | 1401 | ops[0].cls.indata = data; |
1398 | ops[0].cls.indata_len = len; | 1402 | ops[0].cls.indata_len = len; |
1399 | 1403 | ||
1400 | ret = rbd_req_sync_op(dev, NULL, | 1404 | ret = rbd_req_sync_op(rbd_dev, NULL, |
1401 | CEPH_NOSNAP, | 1405 | CEPH_NOSNAP, |
1402 | 0, | ||
1403 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, | 1406 | CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, |
1404 | ops, | 1407 | ops, |
1405 | 1, obj, 0, 0, NULL, NULL, ver); | 1408 | object_name, 0, 0, NULL, NULL, ver); |
1406 | 1409 | ||
1407 | rbd_destroy_ops(ops); | 1410 | rbd_destroy_ops(ops); |
1408 | 1411 | ||
@@ -1437,10 +1440,12 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1437 | struct bio *bio; | 1440 | struct bio *bio; |
1438 | struct bio *rq_bio, *next_bio = NULL; | 1441 | struct bio *rq_bio, *next_bio = NULL; |
1439 | bool do_write; | 1442 | bool do_write; |
1440 | int size, op_size = 0; | 1443 | unsigned int size; |
1444 | u64 op_size = 0; | ||
1441 | u64 ofs; | 1445 | u64 ofs; |
1442 | int num_segs, cur_seg = 0; | 1446 | int num_segs, cur_seg = 0; |
1443 | struct rbd_req_coll *coll; | 1447 | struct rbd_req_coll *coll; |
1448 | struct ceph_snap_context *snapc; | ||
1444 | 1449 | ||
1445 | /* peek at request from block layer */ | 1450 | /* peek at request from block layer */ |
1446 | if (!rq) | 1451 | if (!rq) |
@@ -1467,23 +1472,38 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1467 | 1472 | ||
1468 | spin_unlock_irq(q->queue_lock); | 1473 | spin_unlock_irq(q->queue_lock); |
1469 | 1474 | ||
1475 | down_read(&rbd_dev->header_rwsem); | ||
1476 | |||
1477 | if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) { | ||
1478 | up_read(&rbd_dev->header_rwsem); | ||
1479 | dout("request for non-existent snapshot"); | ||
1480 | spin_lock_irq(q->queue_lock); | ||
1481 | __blk_end_request_all(rq, -ENXIO); | ||
1482 | continue; | ||
1483 | } | ||
1484 | |||
1485 | snapc = ceph_get_snap_context(rbd_dev->header.snapc); | ||
1486 | |||
1487 | up_read(&rbd_dev->header_rwsem); | ||
1488 | |||
1470 | dout("%s 0x%x bytes at 0x%llx\n", | 1489 | dout("%s 0x%x bytes at 0x%llx\n", |
1471 | do_write ? "write" : "read", | 1490 | do_write ? "write" : "read", |
1472 | size, blk_rq_pos(rq) * SECTOR_SIZE); | 1491 | size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); |
1473 | 1492 | ||
1474 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); | 1493 | num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); |
1475 | coll = rbd_alloc_coll(num_segs); | 1494 | coll = rbd_alloc_coll(num_segs); |
1476 | if (!coll) { | 1495 | if (!coll) { |
1477 | spin_lock_irq(q->queue_lock); | 1496 | spin_lock_irq(q->queue_lock); |
1478 | __blk_end_request_all(rq, -ENOMEM); | 1497 | __blk_end_request_all(rq, -ENOMEM); |
1498 | ceph_put_snap_context(snapc); | ||
1479 | continue; | 1499 | continue; |
1480 | } | 1500 | } |
1481 | 1501 | ||
1482 | do { | 1502 | do { |
1483 | /* a bio clone to be passed down to OSD req */ | 1503 | /* a bio clone to be passed down to OSD req */ |
1484 | dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); | 1504 | dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); |
1485 | op_size = rbd_get_segment(&rbd_dev->header, | 1505 | op_size = rbd_get_segment(&rbd_dev->header, |
1486 | rbd_dev->header.block_name, | 1506 | rbd_dev->header.object_prefix, |
1487 | ofs, size, | 1507 | ofs, size, |
1488 | NULL, NULL); | 1508 | NULL, NULL); |
1489 | kref_get(&coll->kref); | 1509 | kref_get(&coll->kref); |
@@ -1499,7 +1519,7 @@ static void rbd_rq_fn(struct request_queue *q) | |||
1499 | /* init OSD command: write or read */ | 1519 | /* init OSD command: write or read */ |
1500 | if (do_write) | 1520 | if (do_write) |
1501 | rbd_req_write(rq, rbd_dev, | 1521 | rbd_req_write(rq, rbd_dev, |
1502 | rbd_dev->header.snapc, | 1522 | snapc, |
1503 | ofs, | 1523 | ofs, |
1504 | op_size, bio, | 1524 | op_size, bio, |
1505 | coll, cur_seg); | 1525 | coll, cur_seg); |
@@ -1522,6 +1542,8 @@ next_seg: | |||
1522 | if (bp) | 1542 | if (bp) |
1523 | bio_pair_release(bp); | 1543 | bio_pair_release(bp); |
1524 | spin_lock_irq(q->queue_lock); | 1544 | spin_lock_irq(q->queue_lock); |
1545 | |||
1546 | ceph_put_snap_context(snapc); | ||
1525 | } | 1547 | } |
1526 | } | 1548 | } |
1527 | 1549 | ||
@@ -1592,18 +1614,19 @@ static int rbd_read_header(struct rbd_device *rbd_dev, | |||
1592 | return -ENOMEM; | 1614 | return -ENOMEM; |
1593 | 1615 | ||
1594 | rc = rbd_req_sync_read(rbd_dev, | 1616 | rc = rbd_req_sync_read(rbd_dev, |
1595 | NULL, CEPH_NOSNAP, | 1617 | CEPH_NOSNAP, |
1596 | rbd_dev->obj_md_name, | 1618 | rbd_dev->header_name, |
1597 | 0, len, | 1619 | 0, len, |
1598 | (char *)dh, &ver); | 1620 | (char *)dh, &ver); |
1599 | if (rc < 0) | 1621 | if (rc < 0) |
1600 | goto out_dh; | 1622 | goto out_dh; |
1601 | 1623 | ||
1602 | rc = rbd_header_from_disk(header, dh, snap_count, GFP_KERNEL); | 1624 | rc = rbd_header_from_disk(header, dh, snap_count); |
1603 | if (rc < 0) { | 1625 | if (rc < 0) { |
1604 | if (rc == -ENXIO) | 1626 | if (rc == -ENXIO) |
1605 | pr_warning("unrecognized header format" | 1627 | pr_warning("unrecognized header format" |
1606 | " for image %s", rbd_dev->obj); | 1628 | " for image %s\n", |
1629 | rbd_dev->image_name); | ||
1607 | goto out_dh; | 1630 | goto out_dh; |
1608 | } | 1631 | } |
1609 | 1632 | ||
@@ -1628,7 +1651,7 @@ out_dh: | |||
1628 | /* | 1651 | /* |
1629 | * create a snapshot | 1652 | * create a snapshot |
1630 | */ | 1653 | */ |
1631 | static int rbd_header_add_snap(struct rbd_device *dev, | 1654 | static int rbd_header_add_snap(struct rbd_device *rbd_dev, |
1632 | const char *snap_name, | 1655 | const char *snap_name, |
1633 | gfp_t gfp_flags) | 1656 | gfp_t gfp_flags) |
1634 | { | 1657 | { |
@@ -1636,16 +1659,15 @@ static int rbd_header_add_snap(struct rbd_device *dev, | |||
1636 | u64 new_snapid; | 1659 | u64 new_snapid; |
1637 | int ret; | 1660 | int ret; |
1638 | void *data, *p, *e; | 1661 | void *data, *p, *e; |
1639 | u64 ver; | ||
1640 | struct ceph_mon_client *monc; | 1662 | struct ceph_mon_client *monc; |
1641 | 1663 | ||
1642 | /* we should create a snapshot only if we're pointing at the head */ | 1664 | /* we should create a snapshot only if we're pointing at the head */ |
1643 | if (dev->snap_id != CEPH_NOSNAP) | 1665 | if (rbd_dev->snap_id != CEPH_NOSNAP) |
1644 | return -EINVAL; | 1666 | return -EINVAL; |
1645 | 1667 | ||
1646 | monc = &dev->rbd_client->client->monc; | 1668 | monc = &rbd_dev->rbd_client->client->monc; |
1647 | ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid); | 1669 | ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid); |
1648 | dout("created snapid=%lld\n", new_snapid); | 1670 | dout("created snapid=%llu\n", (unsigned long long) new_snapid); |
1649 | if (ret < 0) | 1671 | if (ret < 0) |
1650 | return ret; | 1672 | return ret; |
1651 | 1673 | ||
@@ -1659,19 +1681,13 @@ static int rbd_header_add_snap(struct rbd_device *dev, | |||
1659 | ceph_encode_string_safe(&p, e, snap_name, name_len, bad); | 1681 | ceph_encode_string_safe(&p, e, snap_name, name_len, bad); |
1660 | ceph_encode_64_safe(&p, e, new_snapid, bad); | 1682 | ceph_encode_64_safe(&p, e, new_snapid, bad); |
1661 | 1683 | ||
1662 | ret = rbd_req_sync_exec(dev, dev->obj_md_name, "rbd", "snap_add", | 1684 | ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, |
1663 | data, p - data, &ver); | 1685 | "rbd", "snap_add", |
1686 | data, p - data, NULL); | ||
1664 | 1687 | ||
1665 | kfree(data); | 1688 | kfree(data); |
1666 | 1689 | ||
1667 | if (ret < 0) | 1690 | return ret < 0 ? ret : 0; |
1668 | return ret; | ||
1669 | |||
1670 | down_write(&dev->header_rwsem); | ||
1671 | dev->header.snapc->seq = new_snapid; | ||
1672 | up_write(&dev->header_rwsem); | ||
1673 | |||
1674 | return 0; | ||
1675 | bad: | 1691 | bad: |
1676 | return -ERANGE; | 1692 | return -ERANGE; |
1677 | } | 1693 | } |
@@ -1679,52 +1695,52 @@ bad: | |||
1679 | static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) | 1695 | static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) |
1680 | { | 1696 | { |
1681 | struct rbd_snap *snap; | 1697 | struct rbd_snap *snap; |
1698 | struct rbd_snap *next; | ||
1682 | 1699 | ||
1683 | while (!list_empty(&rbd_dev->snaps)) { | 1700 | list_for_each_entry_safe(snap, next, &rbd_dev->snaps, node) |
1684 | snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node); | 1701 | __rbd_remove_snap_dev(snap); |
1685 | __rbd_remove_snap_dev(rbd_dev, snap); | ||
1686 | } | ||
1687 | } | 1702 | } |
1688 | 1703 | ||
1689 | /* | 1704 | /* |
1690 | * only read the first part of the ondisk header, without the snaps info | 1705 | * only read the first part of the ondisk header, without the snaps info |
1691 | */ | 1706 | */ |
1692 | static int __rbd_refresh_header(struct rbd_device *rbd_dev) | 1707 | static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) |
1693 | { | 1708 | { |
1694 | int ret; | 1709 | int ret; |
1695 | struct rbd_image_header h; | 1710 | struct rbd_image_header h; |
1696 | u64 snap_seq; | ||
1697 | int follow_seq = 0; | ||
1698 | 1711 | ||
1699 | ret = rbd_read_header(rbd_dev, &h); | 1712 | ret = rbd_read_header(rbd_dev, &h); |
1700 | if (ret < 0) | 1713 | if (ret < 0) |
1701 | return ret; | 1714 | return ret; |
1702 | 1715 | ||
1703 | /* resized? */ | ||
1704 | set_capacity(rbd_dev->disk, h.image_size / SECTOR_SIZE); | ||
1705 | |||
1706 | down_write(&rbd_dev->header_rwsem); | 1716 | down_write(&rbd_dev->header_rwsem); |
1707 | 1717 | ||
1708 | snap_seq = rbd_dev->header.snapc->seq; | 1718 | /* resized? */ |
1709 | if (rbd_dev->header.total_snaps && | 1719 | if (rbd_dev->snap_id == CEPH_NOSNAP) { |
1710 | rbd_dev->header.snapc->snaps[0] == snap_seq) | 1720 | sector_t size = (sector_t) h.image_size / SECTOR_SIZE; |
1711 | /* pointing at the head, will need to follow that | ||
1712 | if head moves */ | ||
1713 | follow_seq = 1; | ||
1714 | 1721 | ||
1715 | kfree(rbd_dev->header.snapc); | 1722 | dout("setting size to %llu sectors", (unsigned long long) size); |
1716 | kfree(rbd_dev->header.snap_names); | 1723 | set_capacity(rbd_dev->disk, size); |
1724 | } | ||
1725 | |||
1726 | /* rbd_dev->header.object_prefix shouldn't change */ | ||
1717 | kfree(rbd_dev->header.snap_sizes); | 1727 | kfree(rbd_dev->header.snap_sizes); |
1728 | kfree(rbd_dev->header.snap_names); | ||
1729 | /* osd requests may still refer to snapc */ | ||
1730 | ceph_put_snap_context(rbd_dev->header.snapc); | ||
1718 | 1731 | ||
1732 | if (hver) | ||
1733 | *hver = h.obj_version; | ||
1734 | rbd_dev->header.obj_version = h.obj_version; | ||
1735 | rbd_dev->header.image_size = h.image_size; | ||
1719 | rbd_dev->header.total_snaps = h.total_snaps; | 1736 | rbd_dev->header.total_snaps = h.total_snaps; |
1720 | rbd_dev->header.snapc = h.snapc; | 1737 | rbd_dev->header.snapc = h.snapc; |
1721 | rbd_dev->header.snap_names = h.snap_names; | 1738 | rbd_dev->header.snap_names = h.snap_names; |
1722 | rbd_dev->header.snap_names_len = h.snap_names_len; | 1739 | rbd_dev->header.snap_names_len = h.snap_names_len; |
1723 | rbd_dev->header.snap_sizes = h.snap_sizes; | 1740 | rbd_dev->header.snap_sizes = h.snap_sizes; |
1724 | if (follow_seq) | 1741 | /* Free the extra copy of the object prefix */ |
1725 | rbd_dev->header.snapc->seq = rbd_dev->header.snapc->snaps[0]; | 1742 | WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); |
1726 | else | 1743 | kfree(h.object_prefix); |
1727 | rbd_dev->header.snapc->seq = snap_seq; | ||
1728 | 1744 | ||
1729 | ret = __rbd_init_snaps_header(rbd_dev); | 1745 | ret = __rbd_init_snaps_header(rbd_dev); |
1730 | 1746 | ||
@@ -1733,6 +1749,17 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev) | |||
1733 | return ret; | 1749 | return ret; |
1734 | } | 1750 | } |
1735 | 1751 | ||
1752 | static int rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver) | ||
1753 | { | ||
1754 | int ret; | ||
1755 | |||
1756 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1757 | ret = __rbd_refresh_header(rbd_dev, hver); | ||
1758 | mutex_unlock(&ctl_mutex); | ||
1759 | |||
1760 | return ret; | ||
1761 | } | ||
1762 | |||
1736 | static int rbd_init_disk(struct rbd_device *rbd_dev) | 1763 | static int rbd_init_disk(struct rbd_device *rbd_dev) |
1737 | { | 1764 | { |
1738 | struct gendisk *disk; | 1765 | struct gendisk *disk; |
@@ -1762,7 +1789,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) | |||
1762 | goto out; | 1789 | goto out; |
1763 | 1790 | ||
1764 | snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", | 1791 | snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", |
1765 | rbd_dev->id); | 1792 | rbd_dev->dev_id); |
1766 | disk->major = rbd_dev->major; | 1793 | disk->major = rbd_dev->major; |
1767 | disk->first_minor = 0; | 1794 | disk->first_minor = 0; |
1768 | disk->fops = &rbd_bd_ops; | 1795 | disk->fops = &rbd_bd_ops; |
@@ -1819,8 +1846,13 @@ static ssize_t rbd_size_show(struct device *dev, | |||
1819 | struct device_attribute *attr, char *buf) | 1846 | struct device_attribute *attr, char *buf) |
1820 | { | 1847 | { |
1821 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | 1848 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1849 | sector_t size; | ||
1850 | |||
1851 | down_read(&rbd_dev->header_rwsem); | ||
1852 | size = get_capacity(rbd_dev->disk); | ||
1853 | up_read(&rbd_dev->header_rwsem); | ||
1822 | 1854 | ||
1823 | return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size); | 1855 | return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); |
1824 | } | 1856 | } |
1825 | 1857 | ||
1826 | static ssize_t rbd_major_show(struct device *dev, | 1858 | static ssize_t rbd_major_show(struct device *dev, |
@@ -1848,12 +1880,20 @@ static ssize_t rbd_pool_show(struct device *dev, | |||
1848 | return sprintf(buf, "%s\n", rbd_dev->pool_name); | 1880 | return sprintf(buf, "%s\n", rbd_dev->pool_name); |
1849 | } | 1881 | } |
1850 | 1882 | ||
1883 | static ssize_t rbd_pool_id_show(struct device *dev, | ||
1884 | struct device_attribute *attr, char *buf) | ||
1885 | { | ||
1886 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | ||
1887 | |||
1888 | return sprintf(buf, "%d\n", rbd_dev->pool_id); | ||
1889 | } | ||
1890 | |||
1851 | static ssize_t rbd_name_show(struct device *dev, | 1891 | static ssize_t rbd_name_show(struct device *dev, |
1852 | struct device_attribute *attr, char *buf) | 1892 | struct device_attribute *attr, char *buf) |
1853 | { | 1893 | { |
1854 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | 1894 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1855 | 1895 | ||
1856 | return sprintf(buf, "%s\n", rbd_dev->obj); | 1896 | return sprintf(buf, "%s\n", rbd_dev->image_name); |
1857 | } | 1897 | } |
1858 | 1898 | ||
1859 | static ssize_t rbd_snap_show(struct device *dev, | 1899 | static ssize_t rbd_snap_show(struct device *dev, |
@@ -1871,23 +1911,18 @@ static ssize_t rbd_image_refresh(struct device *dev, | |||
1871 | size_t size) | 1911 | size_t size) |
1872 | { | 1912 | { |
1873 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); | 1913 | struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); |
1874 | int rc; | 1914 | int ret; |
1875 | int ret = size; | ||
1876 | |||
1877 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | ||
1878 | 1915 | ||
1879 | rc = __rbd_refresh_header(rbd_dev); | 1916 | ret = rbd_refresh_header(rbd_dev, NULL); |
1880 | if (rc < 0) | ||
1881 | ret = rc; | ||
1882 | 1917 | ||
1883 | mutex_unlock(&ctl_mutex); | 1918 | return ret < 0 ? ret : size; |
1884 | return ret; | ||
1885 | } | 1919 | } |
1886 | 1920 | ||
1887 | static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); | 1921 | static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); |
1888 | static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); | 1922 | static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); |
1889 | static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); | 1923 | static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); |
1890 | static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); | 1924 | static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); |
1925 | static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); | ||
1891 | static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); | 1926 | static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); |
1892 | static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); | 1927 | static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); |
1893 | static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); | 1928 | static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); |
@@ -1898,6 +1933,7 @@ static struct attribute *rbd_attrs[] = { | |||
1898 | &dev_attr_major.attr, | 1933 | &dev_attr_major.attr, |
1899 | &dev_attr_client_id.attr, | 1934 | &dev_attr_client_id.attr, |
1900 | &dev_attr_pool.attr, | 1935 | &dev_attr_pool.attr, |
1936 | &dev_attr_pool_id.attr, | ||
1901 | &dev_attr_name.attr, | 1937 | &dev_attr_name.attr, |
1902 | &dev_attr_current_snap.attr, | 1938 | &dev_attr_current_snap.attr, |
1903 | &dev_attr_refresh.attr, | 1939 | &dev_attr_refresh.attr, |
@@ -1977,15 +2013,13 @@ static struct device_type rbd_snap_device_type = { | |||
1977 | .release = rbd_snap_dev_release, | 2013 | .release = rbd_snap_dev_release, |
1978 | }; | 2014 | }; |
1979 | 2015 | ||
1980 | static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev, | 2016 | static void __rbd_remove_snap_dev(struct rbd_snap *snap) |
1981 | struct rbd_snap *snap) | ||
1982 | { | 2017 | { |
1983 | list_del(&snap->node); | 2018 | list_del(&snap->node); |
1984 | device_unregister(&snap->dev); | 2019 | device_unregister(&snap->dev); |
1985 | } | 2020 | } |
1986 | 2021 | ||
1987 | static int rbd_register_snap_dev(struct rbd_device *rbd_dev, | 2022 | static int rbd_register_snap_dev(struct rbd_snap *snap, |
1988 | struct rbd_snap *snap, | ||
1989 | struct device *parent) | 2023 | struct device *parent) |
1990 | { | 2024 | { |
1991 | struct device *dev = &snap->dev; | 2025 | struct device *dev = &snap->dev; |
@@ -2000,29 +2034,36 @@ static int rbd_register_snap_dev(struct rbd_device *rbd_dev, | |||
2000 | return ret; | 2034 | return ret; |
2001 | } | 2035 | } |
2002 | 2036 | ||
2003 | static int __rbd_add_snap_dev(struct rbd_device *rbd_dev, | 2037 | static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, |
2004 | int i, const char *name, | 2038 | int i, const char *name) |
2005 | struct rbd_snap **snapp) | ||
2006 | { | 2039 | { |
2040 | struct rbd_snap *snap; | ||
2007 | int ret; | 2041 | int ret; |
2008 | struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL); | 2042 | |
2043 | snap = kzalloc(sizeof (*snap), GFP_KERNEL); | ||
2009 | if (!snap) | 2044 | if (!snap) |
2010 | return -ENOMEM; | 2045 | return ERR_PTR(-ENOMEM); |
2046 | |||
2047 | ret = -ENOMEM; | ||
2011 | snap->name = kstrdup(name, GFP_KERNEL); | 2048 | snap->name = kstrdup(name, GFP_KERNEL); |
2049 | if (!snap->name) | ||
2050 | goto err; | ||
2051 | |||
2012 | snap->size = rbd_dev->header.snap_sizes[i]; | 2052 | snap->size = rbd_dev->header.snap_sizes[i]; |
2013 | snap->id = rbd_dev->header.snapc->snaps[i]; | 2053 | snap->id = rbd_dev->header.snapc->snaps[i]; |
2014 | if (device_is_registered(&rbd_dev->dev)) { | 2054 | if (device_is_registered(&rbd_dev->dev)) { |
2015 | ret = rbd_register_snap_dev(rbd_dev, snap, | 2055 | ret = rbd_register_snap_dev(snap, &rbd_dev->dev); |
2016 | &rbd_dev->dev); | ||
2017 | if (ret < 0) | 2056 | if (ret < 0) |
2018 | goto err; | 2057 | goto err; |
2019 | } | 2058 | } |
2020 | *snapp = snap; | 2059 | |
2021 | return 0; | 2060 | return snap; |
2061 | |||
2022 | err: | 2062 | err: |
2023 | kfree(snap->name); | 2063 | kfree(snap->name); |
2024 | kfree(snap); | 2064 | kfree(snap); |
2025 | return ret; | 2065 | |
2066 | return ERR_PTR(ret); | ||
2026 | } | 2067 | } |
2027 | 2068 | ||
2028 | /* | 2069 | /* |
@@ -2055,7 +2096,6 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) | |||
2055 | const char *name, *first_name; | 2096 | const char *name, *first_name; |
2056 | int i = rbd_dev->header.total_snaps; | 2097 | int i = rbd_dev->header.total_snaps; |
2057 | struct rbd_snap *snap, *old_snap = NULL; | 2098 | struct rbd_snap *snap, *old_snap = NULL; |
2058 | int ret; | ||
2059 | struct list_head *p, *n; | 2099 | struct list_head *p, *n; |
2060 | 2100 | ||
2061 | first_name = rbd_dev->header.snap_names; | 2101 | first_name = rbd_dev->header.snap_names; |
@@ -2070,8 +2110,15 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) | |||
2070 | cur_id = rbd_dev->header.snapc->snaps[i - 1]; | 2110 | cur_id = rbd_dev->header.snapc->snaps[i - 1]; |
2071 | 2111 | ||
2072 | if (!i || old_snap->id < cur_id) { | 2112 | if (!i || old_snap->id < cur_id) { |
2073 | /* old_snap->id was skipped, thus was removed */ | 2113 | /* |
2074 | __rbd_remove_snap_dev(rbd_dev, old_snap); | 2114 | * old_snap->id was skipped, thus was |
2115 | * removed. If this rbd_dev is mapped to | ||
2116 | * the removed snapshot, record that it no | ||
2117 | * longer exists, to prevent further I/O. | ||
2118 | */ | ||
2119 | if (rbd_dev->snap_id == old_snap->id) | ||
2120 | rbd_dev->snap_exists = false; | ||
2121 | __rbd_remove_snap_dev(old_snap); | ||
2075 | continue; | 2122 | continue; |
2076 | } | 2123 | } |
2077 | if (old_snap->id == cur_id) { | 2124 | if (old_snap->id == cur_id) { |
@@ -2091,9 +2138,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) | |||
2091 | if (cur_id >= old_snap->id) | 2138 | if (cur_id >= old_snap->id) |
2092 | break; | 2139 | break; |
2093 | /* a new snapshot */ | 2140 | /* a new snapshot */ |
2094 | ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap); | 2141 | snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); |
2095 | if (ret < 0) | 2142 | if (IS_ERR(snap)) |
2096 | return ret; | 2143 | return PTR_ERR(snap); |
2097 | 2144 | ||
2098 | /* note that we add it backward so using n and not p */ | 2145 | /* note that we add it backward so using n and not p */ |
2099 | list_add(&snap->node, n); | 2146 | list_add(&snap->node, n); |
@@ -2107,9 +2154,9 @@ static int __rbd_init_snaps_header(struct rbd_device *rbd_dev) | |||
2107 | WARN_ON(1); | 2154 | WARN_ON(1); |
2108 | return -EINVAL; | 2155 | return -EINVAL; |
2109 | } | 2156 | } |
2110 | ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap); | 2157 | snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); |
2111 | if (ret < 0) | 2158 | if (IS_ERR(snap)) |
2112 | return ret; | 2159 | return PTR_ERR(snap); |
2113 | list_add(&snap->node, &rbd_dev->snaps); | 2160 | list_add(&snap->node, &rbd_dev->snaps); |
2114 | } | 2161 | } |
2115 | 2162 | ||
@@ -2129,14 +2176,13 @@ static int rbd_bus_add_dev(struct rbd_device *rbd_dev) | |||
2129 | dev->type = &rbd_device_type; | 2176 | dev->type = &rbd_device_type; |
2130 | dev->parent = &rbd_root_dev; | 2177 | dev->parent = &rbd_root_dev; |
2131 | dev->release = rbd_dev_release; | 2178 | dev->release = rbd_dev_release; |
2132 | dev_set_name(dev, "%d", rbd_dev->id); | 2179 | dev_set_name(dev, "%d", rbd_dev->dev_id); |
2133 | ret = device_register(dev); | 2180 | ret = device_register(dev); |
2134 | if (ret < 0) | 2181 | if (ret < 0) |
2135 | goto out; | 2182 | goto out; |
2136 | 2183 | ||
2137 | list_for_each_entry(snap, &rbd_dev->snaps, node) { | 2184 | list_for_each_entry(snap, &rbd_dev->snaps, node) { |
2138 | ret = rbd_register_snap_dev(rbd_dev, snap, | 2185 | ret = rbd_register_snap_dev(snap, &rbd_dev->dev); |
2139 | &rbd_dev->dev); | ||
2140 | if (ret < 0) | 2186 | if (ret < 0) |
2141 | break; | 2187 | break; |
2142 | } | 2188 | } |
@@ -2155,12 +2201,9 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev) | |||
2155 | int ret, rc; | 2201 | int ret, rc; |
2156 | 2202 | ||
2157 | do { | 2203 | do { |
2158 | ret = rbd_req_sync_watch(rbd_dev, rbd_dev->obj_md_name, | 2204 | ret = rbd_req_sync_watch(rbd_dev); |
2159 | rbd_dev->header.obj_version); | ||
2160 | if (ret == -ERANGE) { | 2205 | if (ret == -ERANGE) { |
2161 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 2206 | rc = rbd_refresh_header(rbd_dev, NULL); |
2162 | rc = __rbd_refresh_header(rbd_dev); | ||
2163 | mutex_unlock(&ctl_mutex); | ||
2164 | if (rc < 0) | 2207 | if (rc < 0) |
2165 | return rc; | 2208 | return rc; |
2166 | } | 2209 | } |
@@ -2177,7 +2220,7 @@ static atomic64_t rbd_id_max = ATOMIC64_INIT(0); | |||
2177 | */ | 2220 | */ |
2178 | static void rbd_id_get(struct rbd_device *rbd_dev) | 2221 | static void rbd_id_get(struct rbd_device *rbd_dev) |
2179 | { | 2222 | { |
2180 | rbd_dev->id = atomic64_inc_return(&rbd_id_max); | 2223 | rbd_dev->dev_id = atomic64_inc_return(&rbd_id_max); |
2181 | 2224 | ||
2182 | spin_lock(&rbd_dev_list_lock); | 2225 | spin_lock(&rbd_dev_list_lock); |
2183 | list_add_tail(&rbd_dev->node, &rbd_dev_list); | 2226 | list_add_tail(&rbd_dev->node, &rbd_dev_list); |
@@ -2191,7 +2234,7 @@ static void rbd_id_get(struct rbd_device *rbd_dev) | |||
2191 | static void rbd_id_put(struct rbd_device *rbd_dev) | 2234 | static void rbd_id_put(struct rbd_device *rbd_dev) |
2192 | { | 2235 | { |
2193 | struct list_head *tmp; | 2236 | struct list_head *tmp; |
2194 | int rbd_id = rbd_dev->id; | 2237 | int rbd_id = rbd_dev->dev_id; |
2195 | int max_id; | 2238 | int max_id; |
2196 | 2239 | ||
2197 | BUG_ON(rbd_id < 1); | 2240 | BUG_ON(rbd_id < 1); |
@@ -2282,19 +2325,58 @@ static inline size_t copy_token(const char **buf, | |||
2282 | } | 2325 | } |
2283 | 2326 | ||
2284 | /* | 2327 | /* |
2285 | * This fills in the pool_name, obj, obj_len, snap_name, obj_len, | 2328 | * Finds the next token in *buf, dynamically allocates a buffer big |
2329 | * enough to hold a copy of it, and copies the token into the new | ||
2330 | * buffer. The copy is guaranteed to be terminated with '\0'. Note | ||
2331 | * that a duplicate buffer is created even for a zero-length token. | ||
2332 | * | ||
2333 | * Returns a pointer to the newly-allocated duplicate, or a null | ||
2334 | * pointer if memory for the duplicate was not available. If | ||
2335 | * the lenp argument is a non-null pointer, the length of the token | ||
2336 | * (not including the '\0') is returned in *lenp. | ||
2337 | * | ||
2338 | * If successful, the *buf pointer will be updated to point beyond | ||
2339 | * the end of the found token. | ||
2340 | * | ||
2341 | * Note: uses GFP_KERNEL for allocation. | ||
2342 | */ | ||
2343 | static inline char *dup_token(const char **buf, size_t *lenp) | ||
2344 | { | ||
2345 | char *dup; | ||
2346 | size_t len; | ||
2347 | |||
2348 | len = next_token(buf); | ||
2349 | dup = kmalloc(len + 1, GFP_KERNEL); | ||
2350 | if (!dup) | ||
2351 | return NULL; | ||
2352 | |||
2353 | memcpy(dup, *buf, len); | ||
2354 | *(dup + len) = '\0'; | ||
2355 | *buf += len; | ||
2356 | |||
2357 | if (lenp) | ||
2358 | *lenp = len; | ||
2359 | |||
2360 | return dup; | ||
2361 | } | ||
2362 | |||
2363 | /* | ||
2364 | * This fills in the pool_name, image_name, image_name_len, snap_name, | ||
2286 | * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based | 2365 | * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based |
2287 | * on the list of monitor addresses and other options provided via | 2366 | * on the list of monitor addresses and other options provided via |
2288 | * /sys/bus/rbd/add. | 2367 | * /sys/bus/rbd/add. |
2368 | * | ||
2369 | * Note: rbd_dev is assumed to have been initially zero-filled. | ||
2289 | */ | 2370 | */ |
2290 | static int rbd_add_parse_args(struct rbd_device *rbd_dev, | 2371 | static int rbd_add_parse_args(struct rbd_device *rbd_dev, |
2291 | const char *buf, | 2372 | const char *buf, |
2292 | const char **mon_addrs, | 2373 | const char **mon_addrs, |
2293 | size_t *mon_addrs_size, | 2374 | size_t *mon_addrs_size, |
2294 | char *options, | 2375 | char *options, |
2295 | size_t options_size) | 2376 | size_t options_size) |
2296 | { | 2377 | { |
2297 | size_t len; | 2378 | size_t len; |
2379 | int ret; | ||
2298 | 2380 | ||
2299 | /* The first four tokens are required */ | 2381 | /* The first four tokens are required */ |
2300 | 2382 | ||
@@ -2310,56 +2392,74 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev, | |||
2310 | if (!len || len >= options_size) | 2392 | if (!len || len >= options_size) |
2311 | return -EINVAL; | 2393 | return -EINVAL; |
2312 | 2394 | ||
2313 | len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name)); | 2395 | ret = -ENOMEM; |
2314 | if (!len || len >= sizeof (rbd_dev->pool_name)) | 2396 | rbd_dev->pool_name = dup_token(&buf, NULL); |
2315 | return -EINVAL; | 2397 | if (!rbd_dev->pool_name) |
2316 | 2398 | goto out_err; | |
2317 | len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj)); | ||
2318 | if (!len || len >= sizeof (rbd_dev->obj)) | ||
2319 | return -EINVAL; | ||
2320 | 2399 | ||
2321 | /* We have the object length in hand, save it. */ | 2400 | rbd_dev->image_name = dup_token(&buf, &rbd_dev->image_name_len); |
2401 | if (!rbd_dev->image_name) | ||
2402 | goto out_err; | ||
2322 | 2403 | ||
2323 | rbd_dev->obj_len = len; | 2404 | /* Create the name of the header object */ |
2324 | 2405 | ||
2325 | BUILD_BUG_ON(RBD_MAX_MD_NAME_LEN | 2406 | rbd_dev->header_name = kmalloc(rbd_dev->image_name_len |
2326 | < RBD_MAX_OBJ_NAME_LEN + sizeof (RBD_SUFFIX)); | 2407 | + sizeof (RBD_SUFFIX), |
2327 | sprintf(rbd_dev->obj_md_name, "%s%s", rbd_dev->obj, RBD_SUFFIX); | 2408 | GFP_KERNEL); |
2409 | if (!rbd_dev->header_name) | ||
2410 | goto out_err; | ||
2411 | sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); | ||
2328 | 2412 | ||
2329 | /* | 2413 | /* |
2330 | * The snapshot name is optional, but it's an error if it's | 2414 | * The snapshot name is optional. If none is is supplied, |
2331 | * too long. If no snapshot is supplied, fill in the default. | 2415 | * we use the default value. |
2332 | */ | 2416 | */ |
2333 | len = copy_token(&buf, rbd_dev->snap_name, sizeof (rbd_dev->snap_name)); | 2417 | rbd_dev->snap_name = dup_token(&buf, &len); |
2334 | if (!len) | 2418 | if (!rbd_dev->snap_name) |
2419 | goto out_err; | ||
2420 | if (!len) { | ||
2421 | /* Replace the empty name with the default */ | ||
2422 | kfree(rbd_dev->snap_name); | ||
2423 | rbd_dev->snap_name | ||
2424 | = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL); | ||
2425 | if (!rbd_dev->snap_name) | ||
2426 | goto out_err; | ||
2427 | |||
2335 | memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, | 2428 | memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, |
2336 | sizeof (RBD_SNAP_HEAD_NAME)); | 2429 | sizeof (RBD_SNAP_HEAD_NAME)); |
2337 | else if (len >= sizeof (rbd_dev->snap_name)) | 2430 | } |
2338 | return -EINVAL; | ||
2339 | 2431 | ||
2340 | return 0; | 2432 | return 0; |
2433 | |||
2434 | out_err: | ||
2435 | kfree(rbd_dev->header_name); | ||
2436 | kfree(rbd_dev->image_name); | ||
2437 | kfree(rbd_dev->pool_name); | ||
2438 | rbd_dev->pool_name = NULL; | ||
2439 | |||
2440 | return ret; | ||
2341 | } | 2441 | } |
2342 | 2442 | ||
2343 | static ssize_t rbd_add(struct bus_type *bus, | 2443 | static ssize_t rbd_add(struct bus_type *bus, |
2344 | const char *buf, | 2444 | const char *buf, |
2345 | size_t count) | 2445 | size_t count) |
2346 | { | 2446 | { |
2347 | struct rbd_device *rbd_dev; | 2447 | char *options; |
2448 | struct rbd_device *rbd_dev = NULL; | ||
2348 | const char *mon_addrs = NULL; | 2449 | const char *mon_addrs = NULL; |
2349 | size_t mon_addrs_size = 0; | 2450 | size_t mon_addrs_size = 0; |
2350 | char *options = NULL; | ||
2351 | struct ceph_osd_client *osdc; | 2451 | struct ceph_osd_client *osdc; |
2352 | int rc = -ENOMEM; | 2452 | int rc = -ENOMEM; |
2353 | 2453 | ||
2354 | if (!try_module_get(THIS_MODULE)) | 2454 | if (!try_module_get(THIS_MODULE)) |
2355 | return -ENODEV; | 2455 | return -ENODEV; |
2356 | 2456 | ||
2357 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | ||
2358 | if (!rbd_dev) | ||
2359 | goto err_nomem; | ||
2360 | options = kmalloc(count, GFP_KERNEL); | 2457 | options = kmalloc(count, GFP_KERNEL); |
2361 | if (!options) | 2458 | if (!options) |
2362 | goto err_nomem; | 2459 | goto err_nomem; |
2460 | rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); | ||
2461 | if (!rbd_dev) | ||
2462 | goto err_nomem; | ||
2363 | 2463 | ||
2364 | /* static rbd_device initialization */ | 2464 | /* static rbd_device initialization */ |
2365 | spin_lock_init(&rbd_dev->lock); | 2465 | spin_lock_init(&rbd_dev->lock); |
@@ -2367,15 +2467,13 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2367 | INIT_LIST_HEAD(&rbd_dev->snaps); | 2467 | INIT_LIST_HEAD(&rbd_dev->snaps); |
2368 | init_rwsem(&rbd_dev->header_rwsem); | 2468 | init_rwsem(&rbd_dev->header_rwsem); |
2369 | 2469 | ||
2370 | init_rwsem(&rbd_dev->header_rwsem); | ||
2371 | |||
2372 | /* generate unique id: find highest unique id, add one */ | 2470 | /* generate unique id: find highest unique id, add one */ |
2373 | rbd_id_get(rbd_dev); | 2471 | rbd_id_get(rbd_dev); |
2374 | 2472 | ||
2375 | /* Fill in the device name, now that we have its id. */ | 2473 | /* Fill in the device name, now that we have its id. */ |
2376 | BUILD_BUG_ON(DEV_NAME_LEN | 2474 | BUILD_BUG_ON(DEV_NAME_LEN |
2377 | < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); | 2475 | < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH); |
2378 | sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id); | 2476 | sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id); |
2379 | 2477 | ||
2380 | /* parse add command */ | 2478 | /* parse add command */ |
2381 | rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, | 2479 | rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, |
@@ -2395,7 +2493,7 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
2395 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); | 2493 | rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); |
2396 | if (rc < 0) | 2494 | if (rc < 0) |
2397 | goto err_out_client; | 2495 | goto err_out_client; |
2398 | rbd_dev->poolid = rc; | 2496 | rbd_dev->pool_id = rc; |
2399 | 2497 | ||
2400 | /* register our block device */ | 2498 | /* register our block device */ |
2401 | rc = register_blkdev(0, rbd_dev->name); | 2499 | rc = register_blkdev(0, rbd_dev->name); |
@@ -2435,10 +2533,16 @@ err_out_blkdev: | |||
2435 | err_out_client: | 2533 | err_out_client: |
2436 | rbd_put_client(rbd_dev); | 2534 | rbd_put_client(rbd_dev); |
2437 | err_put_id: | 2535 | err_put_id: |
2536 | if (rbd_dev->pool_name) { | ||
2537 | kfree(rbd_dev->snap_name); | ||
2538 | kfree(rbd_dev->header_name); | ||
2539 | kfree(rbd_dev->image_name); | ||
2540 | kfree(rbd_dev->pool_name); | ||
2541 | } | ||
2438 | rbd_id_put(rbd_dev); | 2542 | rbd_id_put(rbd_dev); |
2439 | err_nomem: | 2543 | err_nomem: |
2440 | kfree(options); | ||
2441 | kfree(rbd_dev); | 2544 | kfree(rbd_dev); |
2545 | kfree(options); | ||
2442 | 2546 | ||
2443 | dout("Error adding device %s\n", buf); | 2547 | dout("Error adding device %s\n", buf); |
2444 | module_put(THIS_MODULE); | 2548 | module_put(THIS_MODULE); |
@@ -2446,7 +2550,7 @@ err_nomem: | |||
2446 | return (ssize_t) rc; | 2550 | return (ssize_t) rc; |
2447 | } | 2551 | } |
2448 | 2552 | ||
2449 | static struct rbd_device *__rbd_get_dev(unsigned long id) | 2553 | static struct rbd_device *__rbd_get_dev(unsigned long dev_id) |
2450 | { | 2554 | { |
2451 | struct list_head *tmp; | 2555 | struct list_head *tmp; |
2452 | struct rbd_device *rbd_dev; | 2556 | struct rbd_device *rbd_dev; |
@@ -2454,7 +2558,7 @@ static struct rbd_device *__rbd_get_dev(unsigned long id) | |||
2454 | spin_lock(&rbd_dev_list_lock); | 2558 | spin_lock(&rbd_dev_list_lock); |
2455 | list_for_each(tmp, &rbd_dev_list) { | 2559 | list_for_each(tmp, &rbd_dev_list) { |
2456 | rbd_dev = list_entry(tmp, struct rbd_device, node); | 2560 | rbd_dev = list_entry(tmp, struct rbd_device, node); |
2457 | if (rbd_dev->id == id) { | 2561 | if (rbd_dev->dev_id == dev_id) { |
2458 | spin_unlock(&rbd_dev_list_lock); | 2562 | spin_unlock(&rbd_dev_list_lock); |
2459 | return rbd_dev; | 2563 | return rbd_dev; |
2460 | } | 2564 | } |
@@ -2474,7 +2578,7 @@ static void rbd_dev_release(struct device *dev) | |||
2474 | rbd_dev->watch_request); | 2578 | rbd_dev->watch_request); |
2475 | } | 2579 | } |
2476 | if (rbd_dev->watch_event) | 2580 | if (rbd_dev->watch_event) |
2477 | rbd_req_sync_unwatch(rbd_dev, rbd_dev->obj_md_name); | 2581 | rbd_req_sync_unwatch(rbd_dev); |
2478 | 2582 | ||
2479 | rbd_put_client(rbd_dev); | 2583 | rbd_put_client(rbd_dev); |
2480 | 2584 | ||
@@ -2483,6 +2587,10 @@ static void rbd_dev_release(struct device *dev) | |||
2483 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 2587 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
2484 | 2588 | ||
2485 | /* done with the id, and with the rbd_dev */ | 2589 | /* done with the id, and with the rbd_dev */ |
2590 | kfree(rbd_dev->snap_name); | ||
2591 | kfree(rbd_dev->header_name); | ||
2592 | kfree(rbd_dev->pool_name); | ||
2593 | kfree(rbd_dev->image_name); | ||
2486 | rbd_id_put(rbd_dev); | 2594 | rbd_id_put(rbd_dev); |
2487 | kfree(rbd_dev); | 2595 | kfree(rbd_dev); |
2488 | 2596 | ||
@@ -2544,7 +2652,7 @@ static ssize_t rbd_snap_add(struct device *dev, | |||
2544 | if (ret < 0) | 2652 | if (ret < 0) |
2545 | goto err_unlock; | 2653 | goto err_unlock; |
2546 | 2654 | ||
2547 | ret = __rbd_refresh_header(rbd_dev); | 2655 | ret = __rbd_refresh_header(rbd_dev, NULL); |
2548 | if (ret < 0) | 2656 | if (ret < 0) |
2549 | goto err_unlock; | 2657 | goto err_unlock; |
2550 | 2658 | ||
@@ -2553,7 +2661,7 @@ static ssize_t rbd_snap_add(struct device *dev, | |||
2553 | mutex_unlock(&ctl_mutex); | 2661 | mutex_unlock(&ctl_mutex); |
2554 | 2662 | ||
2555 | /* make a best effort, don't error if failed */ | 2663 | /* make a best effort, don't error if failed */ |
2556 | rbd_req_sync_notify(rbd_dev, rbd_dev->obj_md_name); | 2664 | rbd_req_sync_notify(rbd_dev); |
2557 | 2665 | ||
2558 | ret = count; | 2666 | ret = count; |
2559 | kfree(name); | 2667 | kfree(name); |