aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/rbd.c1784
-rw-r--r--drivers/block/rbd_types.h27
2 files changed, 1187 insertions, 624 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 54a55f03115d..bb3d9be3b1b4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -41,6 +41,8 @@
41 41
42#include "rbd_types.h" 42#include "rbd_types.h"
43 43
44#define RBD_DEBUG /* Activate rbd_assert() calls */
45
44/* 46/*
45 * The basic unit of block I/O is a sector. It is interpreted in a 47 * The basic unit of block I/O is a sector. It is interpreted in a
46 * number of contexts in Linux (blk, bio, genhd), but the default is 48 * number of contexts in Linux (blk, bio, genhd), but the default is
@@ -50,16 +52,24 @@
50#define SECTOR_SHIFT 9 52#define SECTOR_SHIFT 9
51#define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 53#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
52 54
55/* It might be useful to have this defined elsewhere too */
56
57#define U64_MAX ((u64) (~0ULL))
58
53#define RBD_DRV_NAME "rbd" 59#define RBD_DRV_NAME "rbd"
54#define RBD_DRV_NAME_LONG "rbd (rados block device)" 60#define RBD_DRV_NAME_LONG "rbd (rados block device)"
55 61
56#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */ 62#define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
57 63
58#define RBD_MAX_SNAP_NAME_LEN 32 64#define RBD_MAX_SNAP_NAME_LEN 32
65#define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */
59#define RBD_MAX_OPT_LEN 1024 66#define RBD_MAX_OPT_LEN 1024
60 67
61#define RBD_SNAP_HEAD_NAME "-" 68#define RBD_SNAP_HEAD_NAME "-"
62 69
70#define RBD_IMAGE_ID_LEN_MAX 64
71#define RBD_OBJ_PREFIX_LEN_MAX 64
72
63/* 73/*
64 * An RBD device name will be "rbd#", where the "rbd" comes from 74 * An RBD device name will be "rbd#", where the "rbd" comes from
65 * RBD_DRV_NAME above, and # is a unique integer identifier. 75 * RBD_DRV_NAME above, and # is a unique integer identifier.
@@ -69,21 +79,22 @@
69#define DEV_NAME_LEN 32 79#define DEV_NAME_LEN 32
70#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) 80#define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1)
71 81
72#define RBD_NOTIFY_TIMEOUT_DEFAULT 10 82#define RBD_READ_ONLY_DEFAULT false
73 83
74/* 84/*
75 * block device image metadata (in-memory version) 85 * block device image metadata (in-memory version)
76 */ 86 */
77struct rbd_image_header { 87struct rbd_image_header {
78 u64 image_size; 88 /* These four fields never change for a given rbd image */
79 char *object_prefix; 89 char *object_prefix;
90 u64 features;
80 __u8 obj_order; 91 __u8 obj_order;
81 __u8 crypt_type; 92 __u8 crypt_type;
82 __u8 comp_type; 93 __u8 comp_type;
83 struct ceph_snap_context *snapc;
84 size_t snap_names_len;
85 u32 total_snaps;
86 94
95 /* The remaining fields need to be updated occasionally */
96 u64 image_size;
97 struct ceph_snap_context *snapc;
87 char *snap_names; 98 char *snap_names;
88 u64 *snap_sizes; 99 u64 *snap_sizes;
89 100
@@ -91,7 +102,7 @@ struct rbd_image_header {
91}; 102};
92 103
93struct rbd_options { 104struct rbd_options {
94 int notify_timeout; 105 bool read_only;
95}; 106};
96 107
97/* 108/*
@@ -99,7 +110,6 @@ struct rbd_options {
99 */ 110 */
100struct rbd_client { 111struct rbd_client {
101 struct ceph_client *client; 112 struct ceph_client *client;
102 struct rbd_options *rbd_opts;
103 struct kref kref; 113 struct kref kref;
104 struct list_head node; 114 struct list_head node;
105}; 115};
@@ -141,6 +151,16 @@ struct rbd_snap {
141 u64 size; 151 u64 size;
142 struct list_head node; 152 struct list_head node;
143 u64 id; 153 u64 id;
154 u64 features;
155};
156
157struct rbd_mapping {
158 char *snap_name;
159 u64 snap_id;
160 u64 size;
161 u64 features;
162 bool snap_exists;
163 bool read_only;
144}; 164};
145 165
146/* 166/*
@@ -151,8 +171,9 @@ struct rbd_device {
151 171
152 int major; /* blkdev assigned major */ 172 int major; /* blkdev assigned major */
153 struct gendisk *disk; /* blkdev's gendisk and rq */ 173 struct gendisk *disk; /* blkdev's gendisk and rq */
154 struct request_queue *q;
155 174
175 u32 image_format; /* Either 1 or 2 */
176 struct rbd_options rbd_opts;
156 struct rbd_client *rbd_client; 177 struct rbd_client *rbd_client;
157 178
158 char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 179 char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
@@ -160,6 +181,8 @@ struct rbd_device {
160 spinlock_t lock; /* queue lock */ 181 spinlock_t lock; /* queue lock */
161 182
162 struct rbd_image_header header; 183 struct rbd_image_header header;
184 char *image_id;
185 size_t image_id_len;
163 char *image_name; 186 char *image_name;
164 size_t image_name_len; 187 size_t image_name_len;
165 char *header_name; 188 char *header_name;
@@ -171,13 +194,8 @@ struct rbd_device {
171 194
172 /* protects updating the header */ 195 /* protects updating the header */
173 struct rw_semaphore header_rwsem; 196 struct rw_semaphore header_rwsem;
174 /* name of the snapshot this device reads from */ 197
175 char *snap_name; 198 struct rbd_mapping mapping;
176 /* id of the snapshot this device reads from */
177 u64 snap_id; /* current snapshot id */
178 /* whether the snap_id this device reads from still exists */
179 bool snap_exists;
180 int read_only;
181 199
182 struct list_head node; 200 struct list_head node;
183 201
@@ -196,12 +214,10 @@ static DEFINE_SPINLOCK(rbd_dev_list_lock);
196static LIST_HEAD(rbd_client_list); /* clients */ 214static LIST_HEAD(rbd_client_list); /* clients */
197static DEFINE_SPINLOCK(rbd_client_list_lock); 215static DEFINE_SPINLOCK(rbd_client_list_lock);
198 216
199static int __rbd_init_snaps_header(struct rbd_device *rbd_dev); 217static int rbd_dev_snaps_update(struct rbd_device *rbd_dev);
218static int rbd_dev_snaps_register(struct rbd_device *rbd_dev);
219
200static void rbd_dev_release(struct device *dev); 220static void rbd_dev_release(struct device *dev);
201static ssize_t rbd_snap_add(struct device *dev,
202 struct device_attribute *attr,
203 const char *buf,
204 size_t count);
205static void __rbd_remove_snap_dev(struct rbd_snap *snap); 221static void __rbd_remove_snap_dev(struct rbd_snap *snap);
206 222
207static ssize_t rbd_add(struct bus_type *bus, const char *buf, 223static ssize_t rbd_add(struct bus_type *bus, const char *buf,
@@ -229,6 +245,18 @@ static struct device rbd_root_dev = {
229 .release = rbd_root_dev_release, 245 .release = rbd_root_dev_release,
230}; 246};
231 247
248#ifdef RBD_DEBUG
249#define rbd_assert(expr) \
250 if (unlikely(!(expr))) { \
251 printk(KERN_ERR "\nAssertion failure in %s() " \
252 "at line %d:\n\n" \
253 "\trbd_assert(%s);\n\n", \
254 __func__, __LINE__, #expr); \
255 BUG(); \
256 }
257#else /* !RBD_DEBUG */
258# define rbd_assert(expr) ((void) 0)
259#endif /* !RBD_DEBUG */
232 260
233static struct device *rbd_get_dev(struct rbd_device *rbd_dev) 261static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
234{ 262{
@@ -246,11 +274,11 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
246{ 274{
247 struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 275 struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
248 276
249 if ((mode & FMODE_WRITE) && rbd_dev->read_only) 277 if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only)
250 return -EROFS; 278 return -EROFS;
251 279
252 rbd_get_dev(rbd_dev); 280 rbd_get_dev(rbd_dev);
253 set_device_ro(bdev, rbd_dev->read_only); 281 set_device_ro(bdev, rbd_dev->mapping.read_only);
254 282
255 return 0; 283 return 0;
256} 284}
@@ -274,8 +302,7 @@ static const struct block_device_operations rbd_bd_ops = {
274 * Initialize an rbd client instance. 302 * Initialize an rbd client instance.
275 * We own *ceph_opts. 303 * We own *ceph_opts.
276 */ 304 */
277static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts, 305static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
278 struct rbd_options *rbd_opts)
279{ 306{
280 struct rbd_client *rbdc; 307 struct rbd_client *rbdc;
281 int ret = -ENOMEM; 308 int ret = -ENOMEM;
@@ -299,8 +326,6 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts,
299 if (ret < 0) 326 if (ret < 0)
300 goto out_err; 327 goto out_err;
301 328
302 rbdc->rbd_opts = rbd_opts;
303
304 spin_lock(&rbd_client_list_lock); 329 spin_lock(&rbd_client_list_lock);
305 list_add_tail(&rbdc->node, &rbd_client_list); 330 list_add_tail(&rbdc->node, &rbd_client_list);
306 spin_unlock(&rbd_client_list_lock); 331 spin_unlock(&rbd_client_list_lock);
@@ -322,36 +347,52 @@ out_opt:
322} 347}
323 348
324/* 349/*
325 * Find a ceph client with specific addr and configuration. 350 * Find a ceph client with specific addr and configuration. If
351 * found, bump its reference count.
326 */ 352 */
327static struct rbd_client *__rbd_client_find(struct ceph_options *ceph_opts) 353static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
328{ 354{
329 struct rbd_client *client_node; 355 struct rbd_client *client_node;
356 bool found = false;
330 357
331 if (ceph_opts->flags & CEPH_OPT_NOSHARE) 358 if (ceph_opts->flags & CEPH_OPT_NOSHARE)
332 return NULL; 359 return NULL;
333 360
334 list_for_each_entry(client_node, &rbd_client_list, node) 361 spin_lock(&rbd_client_list_lock);
335 if (!ceph_compare_options(ceph_opts, client_node->client)) 362 list_for_each_entry(client_node, &rbd_client_list, node) {
336 return client_node; 363 if (!ceph_compare_options(ceph_opts, client_node->client)) {
337 return NULL; 364 kref_get(&client_node->kref);
365 found = true;
366 break;
367 }
368 }
369 spin_unlock(&rbd_client_list_lock);
370
371 return found ? client_node : NULL;
338} 372}
339 373
340/* 374/*
341 * mount options 375 * mount options
342 */ 376 */
343enum { 377enum {
344 Opt_notify_timeout,
345 Opt_last_int, 378 Opt_last_int,
346 /* int args above */ 379 /* int args above */
347 Opt_last_string, 380 Opt_last_string,
348 /* string args above */ 381 /* string args above */
382 Opt_read_only,
383 Opt_read_write,
384 /* Boolean args above */
385 Opt_last_bool,
349}; 386};
350 387
351static match_table_t rbd_opts_tokens = { 388static match_table_t rbd_opts_tokens = {
352 {Opt_notify_timeout, "notify_timeout=%d"},
353 /* int args above */ 389 /* int args above */
354 /* string args above */ 390 /* string args above */
391 {Opt_read_only, "mapping.read_only"},
392 {Opt_read_only, "ro"}, /* Alternate spelling */
393 {Opt_read_write, "read_write"},
394 {Opt_read_write, "rw"}, /* Alternate spelling */
395 /* Boolean args above */
355 {-1, NULL} 396 {-1, NULL}
356}; 397};
357 398
@@ -376,16 +417,22 @@ static int parse_rbd_opts_token(char *c, void *private)
376 } else if (token > Opt_last_int && token < Opt_last_string) { 417 } else if (token > Opt_last_int && token < Opt_last_string) {
377 dout("got string token %d val %s\n", token, 418 dout("got string token %d val %s\n", token,
378 argstr[0].from); 419 argstr[0].from);
420 } else if (token > Opt_last_string && token < Opt_last_bool) {
421 dout("got Boolean token %d\n", token);
379 } else { 422 } else {
380 dout("got token %d\n", token); 423 dout("got token %d\n", token);
381 } 424 }
382 425
383 switch (token) { 426 switch (token) {
384 case Opt_notify_timeout: 427 case Opt_read_only:
385 rbd_opts->notify_timeout = intval; 428 rbd_opts->read_only = true;
429 break;
430 case Opt_read_write:
431 rbd_opts->read_only = false;
386 break; 432 break;
387 default: 433 default:
388 BUG_ON(token); 434 rbd_assert(false);
435 break;
389 } 436 }
390 return 0; 437 return 0;
391} 438}
@@ -394,48 +441,33 @@ static int parse_rbd_opts_token(char *c, void *private)
394 * Get a ceph client with specific addr and configuration, if one does 441 * Get a ceph client with specific addr and configuration, if one does
395 * not exist create it. 442 * not exist create it.
396 */ 443 */
397static struct rbd_client *rbd_get_client(const char *mon_addr, 444static int rbd_get_client(struct rbd_device *rbd_dev, const char *mon_addr,
398 size_t mon_addr_len, 445 size_t mon_addr_len, char *options)
399 char *options)
400{ 446{
401 struct rbd_client *rbdc; 447 struct rbd_options *rbd_opts = &rbd_dev->rbd_opts;
402 struct ceph_options *ceph_opts; 448 struct ceph_options *ceph_opts;
403 struct rbd_options *rbd_opts; 449 struct rbd_client *rbdc;
404
405 rbd_opts = kzalloc(sizeof(*rbd_opts), GFP_KERNEL);
406 if (!rbd_opts)
407 return ERR_PTR(-ENOMEM);
408 450
409 rbd_opts->notify_timeout = RBD_NOTIFY_TIMEOUT_DEFAULT; 451 rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
410 452
411 ceph_opts = ceph_parse_options(options, mon_addr, 453 ceph_opts = ceph_parse_options(options, mon_addr,
412 mon_addr + mon_addr_len, 454 mon_addr + mon_addr_len,
413 parse_rbd_opts_token, rbd_opts); 455 parse_rbd_opts_token, rbd_opts);
414 if (IS_ERR(ceph_opts)) { 456 if (IS_ERR(ceph_opts))
415 kfree(rbd_opts); 457 return PTR_ERR(ceph_opts);
416 return ERR_CAST(ceph_opts);
417 }
418 458
419 spin_lock(&rbd_client_list_lock); 459 rbdc = rbd_client_find(ceph_opts);
420 rbdc = __rbd_client_find(ceph_opts);
421 if (rbdc) { 460 if (rbdc) {
422 /* using an existing client */ 461 /* using an existing client */
423 kref_get(&rbdc->kref);
424 spin_unlock(&rbd_client_list_lock);
425
426 ceph_destroy_options(ceph_opts); 462 ceph_destroy_options(ceph_opts);
427 kfree(rbd_opts); 463 } else {
428 464 rbdc = rbd_client_create(ceph_opts);
429 return rbdc; 465 if (IS_ERR(rbdc))
466 return PTR_ERR(rbdc);
430 } 467 }
431 spin_unlock(&rbd_client_list_lock); 468 rbd_dev->rbd_client = rbdc;
432
433 rbdc = rbd_client_create(ceph_opts, rbd_opts);
434 469
435 if (IS_ERR(rbdc)) 470 return 0;
436 kfree(rbd_opts);
437
438 return rbdc;
439} 471}
440 472
441/* 473/*
@@ -453,7 +485,6 @@ static void rbd_client_release(struct kref *kref)
453 spin_unlock(&rbd_client_list_lock); 485 spin_unlock(&rbd_client_list_lock);
454 486
455 ceph_destroy_client(rbdc->client); 487 ceph_destroy_client(rbdc->client);
456 kfree(rbdc->rbd_opts);
457 kfree(rbdc); 488 kfree(rbdc);
458} 489}
459 490
@@ -479,10 +510,38 @@ static void rbd_coll_release(struct kref *kref)
479 kfree(coll); 510 kfree(coll);
480} 511}
481 512
513static bool rbd_image_format_valid(u32 image_format)
514{
515 return image_format == 1 || image_format == 2;
516}
517
482static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 518static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
483{ 519{
484 return !memcmp(&ondisk->text, 520 size_t size;
485 RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)); 521 u32 snap_count;
522
523 /* The header has to start with the magic rbd header text */
524 if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
525 return false;
526
527 /*
528 * The size of a snapshot header has to fit in a size_t, and
529 * that limits the number of snapshots.
530 */
531 snap_count = le32_to_cpu(ondisk->snap_count);
532 size = SIZE_MAX - sizeof (struct ceph_snap_context);
533 if (snap_count > size / sizeof (__le64))
534 return false;
535
536 /*
537 * Not only that, but the size of the entire the snapshot
538 * header must also be representable in a size_t.
539 */
540 size -= snap_count * sizeof (__le64);
541 if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
542 return false;
543
544 return true;
486} 545}
487 546
488/* 547/*
@@ -490,179 +549,203 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
490 * header. 549 * header.
491 */ 550 */
492static int rbd_header_from_disk(struct rbd_image_header *header, 551static int rbd_header_from_disk(struct rbd_image_header *header,
493 struct rbd_image_header_ondisk *ondisk, 552 struct rbd_image_header_ondisk *ondisk)
494 u32 allocated_snaps)
495{ 553{
496 u32 snap_count; 554 u32 snap_count;
555 size_t len;
556 size_t size;
557 u32 i;
497 558
498 if (!rbd_dev_ondisk_valid(ondisk)) 559 memset(header, 0, sizeof (*header));
499 return -ENXIO;
500 560
501 snap_count = le32_to_cpu(ondisk->snap_count); 561 snap_count = le32_to_cpu(ondisk->snap_count);
502 if (snap_count > (SIZE_MAX - sizeof(struct ceph_snap_context)) 562
503 / sizeof (u64)) 563 len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix));
504 return -EINVAL; 564 header->object_prefix = kmalloc(len + 1, GFP_KERNEL);
505 header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 565 if (!header->object_prefix)
506 snap_count * sizeof(u64),
507 GFP_KERNEL);
508 if (!header->snapc)
509 return -ENOMEM; 566 return -ENOMEM;
567 memcpy(header->object_prefix, ondisk->object_prefix, len);
568 header->object_prefix[len] = '\0';
510 569
511 if (snap_count) { 570 if (snap_count) {
512 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); 571 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
513 header->snap_names = kmalloc(header->snap_names_len, 572
514 GFP_KERNEL); 573 /* Save a copy of the snapshot names */
574
575 if (snap_names_len > (u64) SIZE_MAX)
576 return -EIO;
577 header->snap_names = kmalloc(snap_names_len, GFP_KERNEL);
515 if (!header->snap_names) 578 if (!header->snap_names)
516 goto err_snapc; 579 goto out_err;
517 header->snap_sizes = kmalloc(snap_count * sizeof(u64), 580 /*
518 GFP_KERNEL); 581 * Note that rbd_dev_v1_header_read() guarantees
582 * the ondisk buffer we're working with has
583 * snap_names_len bytes beyond the end of the
584 * snapshot id array, this memcpy() is safe.
585 */
586 memcpy(header->snap_names, &ondisk->snaps[snap_count],
587 snap_names_len);
588
589 /* Record each snapshot's size */
590
591 size = snap_count * sizeof (*header->snap_sizes);
592 header->snap_sizes = kmalloc(size, GFP_KERNEL);
519 if (!header->snap_sizes) 593 if (!header->snap_sizes)
520 goto err_names; 594 goto out_err;
595 for (i = 0; i < snap_count; i++)
596 header->snap_sizes[i] =
597 le64_to_cpu(ondisk->snaps[i].image_size);
521 } else { 598 } else {
522 WARN_ON(ondisk->snap_names_len); 599 WARN_ON(ondisk->snap_names_len);
523 header->snap_names_len = 0;
524 header->snap_names = NULL; 600 header->snap_names = NULL;
525 header->snap_sizes = NULL; 601 header->snap_sizes = NULL;
526 } 602 }
527 603
528 header->object_prefix = kmalloc(sizeof (ondisk->block_name) + 1, 604 header->features = 0; /* No features support in v1 images */
529 GFP_KERNEL);
530 if (!header->object_prefix)
531 goto err_sizes;
532
533 memcpy(header->object_prefix, ondisk->block_name,
534 sizeof(ondisk->block_name));
535 header->object_prefix[sizeof (ondisk->block_name)] = '\0';
536
537 header->image_size = le64_to_cpu(ondisk->image_size);
538 header->obj_order = ondisk->options.order; 605 header->obj_order = ondisk->options.order;
539 header->crypt_type = ondisk->options.crypt_type; 606 header->crypt_type = ondisk->options.crypt_type;
540 header->comp_type = ondisk->options.comp_type; 607 header->comp_type = ondisk->options.comp_type;
541 608
609 /* Allocate and fill in the snapshot context */
610
611 header->image_size = le64_to_cpu(ondisk->image_size);
612 size = sizeof (struct ceph_snap_context);
613 size += snap_count * sizeof (header->snapc->snaps[0]);
614 header->snapc = kzalloc(size, GFP_KERNEL);
615 if (!header->snapc)
616 goto out_err;
617
542 atomic_set(&header->snapc->nref, 1); 618 atomic_set(&header->snapc->nref, 1);
543 header->snapc->seq = le64_to_cpu(ondisk->snap_seq); 619 header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
544 header->snapc->num_snaps = snap_count; 620 header->snapc->num_snaps = snap_count;
545 header->total_snaps = snap_count; 621 for (i = 0; i < snap_count; i++)
546 622 header->snapc->snaps[i] =
547 if (snap_count && allocated_snaps == snap_count) { 623 le64_to_cpu(ondisk->snaps[i].id);
548 int i;
549
550 for (i = 0; i < snap_count; i++) {
551 header->snapc->snaps[i] =
552 le64_to_cpu(ondisk->snaps[i].id);
553 header->snap_sizes[i] =
554 le64_to_cpu(ondisk->snaps[i].image_size);
555 }
556
557 /* copy snapshot names */
558 memcpy(header->snap_names, &ondisk->snaps[snap_count],
559 header->snap_names_len);
560 }
561 624
562 return 0; 625 return 0;
563 626
564err_sizes: 627out_err:
565 kfree(header->snap_sizes); 628 kfree(header->snap_sizes);
566 header->snap_sizes = NULL; 629 header->snap_sizes = NULL;
567err_names:
568 kfree(header->snap_names); 630 kfree(header->snap_names);
569 header->snap_names = NULL; 631 header->snap_names = NULL;
570err_snapc: 632 kfree(header->object_prefix);
571 kfree(header->snapc); 633 header->object_prefix = NULL;
572 header->snapc = NULL;
573 634
574 return -ENOMEM; 635 return -ENOMEM;
575} 636}
576 637
577static int snap_by_name(struct rbd_image_header *header, const char *snap_name, 638static int snap_by_name(struct rbd_device *rbd_dev, const char *snap_name)
578 u64 *seq, u64 *size)
579{ 639{
580 int i;
581 char *p = header->snap_names;
582 640
583 for (i = 0; i < header->total_snaps; i++) { 641 struct rbd_snap *snap;
584 if (!strcmp(snap_name, p)) {
585 642
586 /* Found it. Pass back its id and/or size */ 643 list_for_each_entry(snap, &rbd_dev->snaps, node) {
644 if (!strcmp(snap_name, snap->name)) {
645 rbd_dev->mapping.snap_id = snap->id;
646 rbd_dev->mapping.size = snap->size;
647 rbd_dev->mapping.features = snap->features;
587 648
588 if (seq) 649 return 0;
589 *seq = header->snapc->snaps[i];
590 if (size)
591 *size = header->snap_sizes[i];
592 return i;
593 } 650 }
594 p += strlen(p) + 1; /* Skip ahead to the next name */
595 } 651 }
652
596 return -ENOENT; 653 return -ENOENT;
597} 654}
598 655
599static int rbd_header_set_snap(struct rbd_device *rbd_dev, u64 *size) 656static int rbd_dev_set_mapping(struct rbd_device *rbd_dev, char *snap_name)
600{ 657{
601 int ret; 658 int ret;
602 659
603 down_write(&rbd_dev->header_rwsem); 660 if (!memcmp(snap_name, RBD_SNAP_HEAD_NAME,
604
605 if (!memcmp(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
606 sizeof (RBD_SNAP_HEAD_NAME))) { 661 sizeof (RBD_SNAP_HEAD_NAME))) {
607 rbd_dev->snap_id = CEPH_NOSNAP; 662 rbd_dev->mapping.snap_id = CEPH_NOSNAP;
608 rbd_dev->snap_exists = false; 663 rbd_dev->mapping.size = rbd_dev->header.image_size;
609 rbd_dev->read_only = 0; 664 rbd_dev->mapping.features = rbd_dev->header.features;
610 if (size) 665 rbd_dev->mapping.snap_exists = false;
611 *size = rbd_dev->header.image_size; 666 rbd_dev->mapping.read_only = rbd_dev->rbd_opts.read_only;
667 ret = 0;
612 } else { 668 } else {
613 u64 snap_id = 0; 669 ret = snap_by_name(rbd_dev, snap_name);
614
615 ret = snap_by_name(&rbd_dev->header, rbd_dev->snap_name,
616 &snap_id, size);
617 if (ret < 0) 670 if (ret < 0)
618 goto done; 671 goto done;
619 rbd_dev->snap_id = snap_id; 672 rbd_dev->mapping.snap_exists = true;
620 rbd_dev->snap_exists = true; 673 rbd_dev->mapping.read_only = true;
621 rbd_dev->read_only = 1;
622 } 674 }
623 675 rbd_dev->mapping.snap_name = snap_name;
624 ret = 0;
625done: 676done:
626 up_write(&rbd_dev->header_rwsem);
627 return ret; 677 return ret;
628} 678}
629 679
630static void rbd_header_free(struct rbd_image_header *header) 680static void rbd_header_free(struct rbd_image_header *header)
631{ 681{
632 kfree(header->object_prefix); 682 kfree(header->object_prefix);
683 header->object_prefix = NULL;
633 kfree(header->snap_sizes); 684 kfree(header->snap_sizes);
685 header->snap_sizes = NULL;
634 kfree(header->snap_names); 686 kfree(header->snap_names);
687 header->snap_names = NULL;
635 ceph_put_snap_context(header->snapc); 688 ceph_put_snap_context(header->snapc);
689 header->snapc = NULL;
636} 690}
637 691
638/* 692static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
639 * get the actual striped segment name, offset and length 693{
640 */ 694 char *name;
641static u64 rbd_get_segment(struct rbd_image_header *header, 695 u64 segment;
642 const char *object_prefix, 696 int ret;
643 u64 ofs, u64 len, 697
644 char *seg_name, u64 *segofs) 698 name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
699 if (!name)
700 return NULL;
701 segment = offset >> rbd_dev->header.obj_order;
702 ret = snprintf(name, RBD_MAX_SEG_NAME_LEN, "%s.%012llx",
703 rbd_dev->header.object_prefix, segment);
704 if (ret < 0 || ret >= RBD_MAX_SEG_NAME_LEN) {
705 pr_err("error formatting segment name for #%llu (%d)\n",
706 segment, ret);
707 kfree(name);
708 name = NULL;
709 }
710
711 return name;
712}
713
714static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
645{ 715{
646 u64 seg = ofs >> header->obj_order; 716 u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
647 717
648 if (seg_name) 718 return offset & (segment_size - 1);
649 snprintf(seg_name, RBD_MAX_SEG_NAME_LEN, 719}
650 "%s.%012llx", object_prefix, seg); 720
721static u64 rbd_segment_length(struct rbd_device *rbd_dev,
722 u64 offset, u64 length)
723{
724 u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
651 725
652 ofs = ofs & ((1 << header->obj_order) - 1); 726 offset &= segment_size - 1;
653 len = min_t(u64, len, (1 << header->obj_order) - ofs);
654 727
655 if (segofs) 728 rbd_assert(length <= U64_MAX - offset);
656 *segofs = ofs; 729 if (offset + length > segment_size)
730 length = segment_size - offset;
657 731
658 return len; 732 return length;
659} 733}
660 734
661static int rbd_get_num_segments(struct rbd_image_header *header, 735static int rbd_get_num_segments(struct rbd_image_header *header,
662 u64 ofs, u64 len) 736 u64 ofs, u64 len)
663{ 737{
664 u64 start_seg = ofs >> header->obj_order; 738 u64 start_seg;
665 u64 end_seg = (ofs + len - 1) >> header->obj_order; 739 u64 end_seg;
740
741 if (!len)
742 return 0;
743 if (len - 1 > U64_MAX - ofs)
744 return -ERANGE;
745
746 start_seg = ofs >> header->obj_order;
747 end_seg = (ofs + len - 1) >> header->obj_order;
748
666 return end_seg - start_seg + 1; 749 return end_seg - start_seg + 1;
667} 750}
668 751
@@ -724,7 +807,9 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
724 struct bio_pair **bp, 807 struct bio_pair **bp,
725 int len, gfp_t gfpmask) 808 int len, gfp_t gfpmask)
726{ 809{
727 struct bio *tmp, *old_chain = *old, *new_chain = NULL, *tail = NULL; 810 struct bio *old_chain = *old;
811 struct bio *new_chain = NULL;
812 struct bio *tail;
728 int total = 0; 813 int total = 0;
729 814
730 if (*bp) { 815 if (*bp) {
@@ -733,9 +818,12 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
733 } 818 }
734 819
735 while (old_chain && (total < len)) { 820 while (old_chain && (total < len)) {
821 struct bio *tmp;
822
736 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 823 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
737 if (!tmp) 824 if (!tmp)
738 goto err_out; 825 goto err_out;
826 gfpmask &= ~__GFP_WAIT; /* can't wait after the first */
739 827
740 if (total + old_chain->bi_size > len) { 828 if (total + old_chain->bi_size > len) {
741 struct bio_pair *bp; 829 struct bio_pair *bp;
@@ -763,24 +851,18 @@ static struct bio *bio_chain_clone(struct bio **old, struct bio **next,
763 } 851 }
764 852
765 tmp->bi_bdev = NULL; 853 tmp->bi_bdev = NULL;
766 gfpmask &= ~__GFP_WAIT;
767 tmp->bi_next = NULL; 854 tmp->bi_next = NULL;
768 855 if (new_chain)
769 if (!new_chain) {
770 new_chain = tail = tmp;
771 } else {
772 tail->bi_next = tmp; 856 tail->bi_next = tmp;
773 tail = tmp; 857 else
774 } 858 new_chain = tmp;
859 tail = tmp;
775 old_chain = old_chain->bi_next; 860 old_chain = old_chain->bi_next;
776 861
777 total += tmp->bi_size; 862 total += tmp->bi_size;
778 } 863 }
779 864
780 BUG_ON(total < len); 865 rbd_assert(total == len);
781
782 if (tail)
783 tail->bi_next = NULL;
784 866
785 *old = old_chain; 867 *old = old_chain;
786 868
@@ -938,8 +1020,9 @@ static int rbd_do_request(struct request *rq,
938 layout->fl_stripe_count = cpu_to_le32(1); 1020 layout->fl_stripe_count = cpu_to_le32(1);
939 layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); 1021 layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
940 layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id); 1022 layout->fl_pg_pool = cpu_to_le32(rbd_dev->pool_id);
941 ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, 1023 ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
942 req, ops); 1024 req, ops);
1025 rbd_assert(ret == 0);
943 1026
944 ceph_osdc_build_request(req, ofs, &len, 1027 ceph_osdc_build_request(req, ofs, &len,
945 ops, 1028 ops,
@@ -1030,8 +1113,8 @@ static int rbd_req_sync_op(struct rbd_device *rbd_dev,
1030 int flags, 1113 int flags,
1031 struct ceph_osd_req_op *ops, 1114 struct ceph_osd_req_op *ops,
1032 const char *object_name, 1115 const char *object_name,
1033 u64 ofs, u64 len, 1116 u64 ofs, u64 inbound_size,
1034 char *buf, 1117 char *inbound,
1035 struct ceph_osd_request **linger_req, 1118 struct ceph_osd_request **linger_req,
1036 u64 *ver) 1119 u64 *ver)
1037{ 1120{
@@ -1039,15 +1122,15 @@ static int rbd_req_sync_op(struct rbd_device *rbd_dev,
1039 struct page **pages; 1122 struct page **pages;
1040 int num_pages; 1123 int num_pages;
1041 1124
1042 BUG_ON(ops == NULL); 1125 rbd_assert(ops != NULL);
1043 1126
1044 num_pages = calc_pages_for(ofs , len); 1127 num_pages = calc_pages_for(ofs, inbound_size);
1045 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 1128 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
1046 if (IS_ERR(pages)) 1129 if (IS_ERR(pages))
1047 return PTR_ERR(pages); 1130 return PTR_ERR(pages);
1048 1131
1049 ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, 1132 ret = rbd_do_request(NULL, rbd_dev, snapc, snapid,
1050 object_name, ofs, len, NULL, 1133 object_name, ofs, inbound_size, NULL,
1051 pages, num_pages, 1134 pages, num_pages,
1052 flags, 1135 flags,
1053 ops, 1136 ops,
@@ -1057,8 +1140,8 @@ static int rbd_req_sync_op(struct rbd_device *rbd_dev,
1057 if (ret < 0) 1140 if (ret < 0)
1058 goto done; 1141 goto done;
1059 1142
1060 if ((flags & CEPH_OSD_FLAG_READ) && buf) 1143 if ((flags & CEPH_OSD_FLAG_READ) && inbound)
1061 ret = ceph_copy_from_page_vector(pages, buf, ofs, ret); 1144 ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret);
1062 1145
1063done: 1146done:
1064 ceph_release_page_vector(pages, num_pages); 1147 ceph_release_page_vector(pages, num_pages);
@@ -1085,14 +1168,11 @@ static int rbd_do_op(struct request *rq,
1085 struct ceph_osd_req_op *ops; 1168 struct ceph_osd_req_op *ops;
1086 u32 payload_len; 1169 u32 payload_len;
1087 1170
1088 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 1171 seg_name = rbd_segment_name(rbd_dev, ofs);
1089 if (!seg_name) 1172 if (!seg_name)
1090 return -ENOMEM; 1173 return -ENOMEM;
1091 1174 seg_len = rbd_segment_length(rbd_dev, ofs, len);
1092 seg_len = rbd_get_segment(&rbd_dev->header, 1175 seg_ofs = rbd_segment_offset(rbd_dev, ofs);
1093 rbd_dev->header.object_prefix,
1094 ofs, len,
1095 seg_name, &seg_ofs);
1096 1176
1097 payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0); 1177 payload_len = (flags & CEPH_OSD_FLAG_WRITE ? seg_len : 0);
1098 1178
@@ -1104,7 +1184,7 @@ static int rbd_do_op(struct request *rq,
1104 /* we've taken care of segment sizes earlier when we 1184 /* we've taken care of segment sizes earlier when we
1105 cloned the bios. We should never have a segment 1185 cloned the bios. We should never have a segment
1106 truncated at this point */ 1186 truncated at this point */
1107 BUG_ON(seg_len < len); 1187 rbd_assert(seg_len == len);
1108 1188
1109 ret = rbd_do_request(rq, rbd_dev, snapc, snapid, 1189 ret = rbd_do_request(rq, rbd_dev, snapc, snapid,
1110 seg_name, seg_ofs, seg_len, 1190 seg_name, seg_ofs, seg_len,
@@ -1306,89 +1386,36 @@ static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev)
1306 return ret; 1386 return ret;
1307} 1387}
1308 1388
1309struct rbd_notify_info {
1310 struct rbd_device *rbd_dev;
1311};
1312
1313static void rbd_notify_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
1314{
1315 struct rbd_device *rbd_dev = (struct rbd_device *)data;
1316 if (!rbd_dev)
1317 return;
1318
1319 dout("rbd_notify_cb %s notify_id=%llu opcode=%u\n",
1320 rbd_dev->header_name, (unsigned long long) notify_id,
1321 (unsigned int) opcode);
1322}
1323
1324/* 1389/*
1325 * Request sync osd notify 1390 * Synchronous osd object method call
1326 */
1327static int rbd_req_sync_notify(struct rbd_device *rbd_dev)
1328{
1329 struct ceph_osd_req_op *ops;
1330 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1331 struct ceph_osd_event *event;
1332 struct rbd_notify_info info;
1333 int payload_len = sizeof(u32) + sizeof(u32);
1334 int ret;
1335
1336 ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY, payload_len);
1337 if (!ops)
1338 return -ENOMEM;
1339
1340 info.rbd_dev = rbd_dev;
1341
1342 ret = ceph_osdc_create_event(osdc, rbd_notify_cb, 1,
1343 (void *)&info, &event);
1344 if (ret < 0)
1345 goto fail;
1346
1347 ops[0].watch.ver = 1;
1348 ops[0].watch.flag = 1;
1349 ops[0].watch.cookie = event->cookie;
1350 ops[0].watch.prot_ver = RADOS_NOTIFY_VER;
1351 ops[0].watch.timeout = 12;
1352
1353 ret = rbd_req_sync_op(rbd_dev, NULL,
1354 CEPH_NOSNAP,
1355 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
1356 ops,
1357 rbd_dev->header_name,
1358 0, 0, NULL, NULL, NULL);
1359 if (ret < 0)
1360 goto fail_event;
1361
1362 ret = ceph_osdc_wait_event(event, CEPH_OSD_TIMEOUT_DEFAULT);
1363 dout("ceph_osdc_wait_event returned %d\n", ret);
1364 rbd_destroy_ops(ops);
1365 return 0;
1366
1367fail_event:
1368 ceph_osdc_cancel_event(event);
1369fail:
1370 rbd_destroy_ops(ops);
1371 return ret;
1372}
1373
1374/*
1375 * Request sync osd read
1376 */ 1391 */
1377static int rbd_req_sync_exec(struct rbd_device *rbd_dev, 1392static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
1378 const char *object_name, 1393 const char *object_name,
1379 const char *class_name, 1394 const char *class_name,
1380 const char *method_name, 1395 const char *method_name,
1381 const char *data, 1396 const char *outbound,
1382 int len, 1397 size_t outbound_size,
1398 char *inbound,
1399 size_t inbound_size,
1400 int flags,
1383 u64 *ver) 1401 u64 *ver)
1384{ 1402{
1385 struct ceph_osd_req_op *ops; 1403 struct ceph_osd_req_op *ops;
1386 int class_name_len = strlen(class_name); 1404 int class_name_len = strlen(class_name);
1387 int method_name_len = strlen(method_name); 1405 int method_name_len = strlen(method_name);
1406 int payload_size;
1388 int ret; 1407 int ret;
1389 1408
1390 ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, 1409 /*
1391 class_name_len + method_name_len + len); 1410 * Any input parameters required by the method we're calling
1411 * will be sent along with the class and method names as
1412 * part of the message payload. That data and its size are
1413 * supplied via the indata and indata_len fields (named from
1414 * the perspective of the server side) in the OSD request
1415 * operation.
1416 */
1417 payload_size = class_name_len + method_name_len + outbound_size;
1418 ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size);
1392 if (!ops) 1419 if (!ops)
1393 return -ENOMEM; 1420 return -ENOMEM;
1394 1421
@@ -1397,14 +1424,14 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev,
1397 ops[0].cls.method_name = method_name; 1424 ops[0].cls.method_name = method_name;
1398 ops[0].cls.method_len = (__u8) method_name_len; 1425 ops[0].cls.method_len = (__u8) method_name_len;
1399 ops[0].cls.argc = 0; 1426 ops[0].cls.argc = 0;
1400 ops[0].cls.indata = data; 1427 ops[0].cls.indata = outbound;
1401 ops[0].cls.indata_len = len; 1428 ops[0].cls.indata_len = outbound_size;
1402 1429
1403 ret = rbd_req_sync_op(rbd_dev, NULL, 1430 ret = rbd_req_sync_op(rbd_dev, NULL,
1404 CEPH_NOSNAP, 1431 CEPH_NOSNAP,
1405 CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 1432 flags, ops,
1406 ops, 1433 object_name, 0, inbound_size, inbound,
1407 object_name, 0, 0, NULL, NULL, ver); 1434 NULL, ver);
1408 1435
1409 rbd_destroy_ops(ops); 1436 rbd_destroy_ops(ops);
1410 1437
@@ -1446,10 +1473,6 @@ static void rbd_rq_fn(struct request_queue *q)
1446 struct rbd_req_coll *coll; 1473 struct rbd_req_coll *coll;
1447 struct ceph_snap_context *snapc; 1474 struct ceph_snap_context *snapc;
1448 1475
1449 /* peek at request from block layer */
1450 if (!rq)
1451 break;
1452
1453 dout("fetched request\n"); 1476 dout("fetched request\n");
1454 1477
1455 /* filter out block requests we don't understand */ 1478 /* filter out block requests we don't understand */
@@ -1464,7 +1487,7 @@ static void rbd_rq_fn(struct request_queue *q)
1464 size = blk_rq_bytes(rq); 1487 size = blk_rq_bytes(rq);
1465 ofs = blk_rq_pos(rq) * SECTOR_SIZE; 1488 ofs = blk_rq_pos(rq) * SECTOR_SIZE;
1466 rq_bio = rq->bio; 1489 rq_bio = rq->bio;
1467 if (do_write && rbd_dev->read_only) { 1490 if (do_write && rbd_dev->mapping.read_only) {
1468 __blk_end_request_all(rq, -EROFS); 1491 __blk_end_request_all(rq, -EROFS);
1469 continue; 1492 continue;
1470 } 1493 }
@@ -1473,7 +1496,8 @@ static void rbd_rq_fn(struct request_queue *q)
1473 1496
1474 down_read(&rbd_dev->header_rwsem); 1497 down_read(&rbd_dev->header_rwsem);
1475 1498
1476 if (rbd_dev->snap_id != CEPH_NOSNAP && !rbd_dev->snap_exists) { 1499 if (rbd_dev->mapping.snap_id != CEPH_NOSNAP &&
1500 !rbd_dev->mapping.snap_exists) {
1477 up_read(&rbd_dev->header_rwsem); 1501 up_read(&rbd_dev->header_rwsem);
1478 dout("request for non-existent snapshot"); 1502 dout("request for non-existent snapshot");
1479 spin_lock_irq(q->queue_lock); 1503 spin_lock_irq(q->queue_lock);
@@ -1490,6 +1514,12 @@ static void rbd_rq_fn(struct request_queue *q)
1490 size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); 1514 size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE);
1491 1515
1492 num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); 1516 num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size);
1517 if (num_segs <= 0) {
1518 spin_lock_irq(q->queue_lock);
1519 __blk_end_request_all(rq, num_segs);
1520 ceph_put_snap_context(snapc);
1521 continue;
1522 }
1493 coll = rbd_alloc_coll(num_segs); 1523 coll = rbd_alloc_coll(num_segs);
1494 if (!coll) { 1524 if (!coll) {
1495 spin_lock_irq(q->queue_lock); 1525 spin_lock_irq(q->queue_lock);
@@ -1501,10 +1531,7 @@ static void rbd_rq_fn(struct request_queue *q)
1501 do { 1531 do {
1502 /* a bio clone to be passed down to OSD req */ 1532 /* a bio clone to be passed down to OSD req */
1503 dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); 1533 dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt);
1504 op_size = rbd_get_segment(&rbd_dev->header, 1534 op_size = rbd_segment_length(rbd_dev, ofs, size);
1505 rbd_dev->header.object_prefix,
1506 ofs, size,
1507 NULL, NULL);
1508 kref_get(&coll->kref); 1535 kref_get(&coll->kref);
1509 bio = bio_chain_clone(&rq_bio, &next_bio, &bp, 1536 bio = bio_chain_clone(&rq_bio, &next_bio, &bp,
1510 op_size, GFP_ATOMIC); 1537 op_size, GFP_ATOMIC);
@@ -1524,7 +1551,7 @@ static void rbd_rq_fn(struct request_queue *q)
1524 coll, cur_seg); 1551 coll, cur_seg);
1525 else 1552 else
1526 rbd_req_read(rq, rbd_dev, 1553 rbd_req_read(rq, rbd_dev,
1527 rbd_dev->snap_id, 1554 rbd_dev->mapping.snap_id,
1528 ofs, 1555 ofs,
1529 op_size, bio, 1556 op_size, bio,
1530 coll, cur_seg); 1557 coll, cur_seg);
@@ -1580,8 +1607,6 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
1580 if (!disk) 1607 if (!disk)
1581 return; 1608 return;
1582 1609
1583 rbd_header_free(&rbd_dev->header);
1584
1585 if (disk->flags & GENHD_FL_UP) 1610 if (disk->flags & GENHD_FL_UP)
1586 del_gendisk(disk); 1611 del_gendisk(disk);
1587 if (disk->queue) 1612 if (disk->queue)
@@ -1590,105 +1615,96 @@ static void rbd_free_disk(struct rbd_device *rbd_dev)
1590} 1615}
1591 1616
1592/* 1617/*
1593 * reload the ondisk the header 1618 * Read the complete header for the given rbd device.
1619 *
1620 * Returns a pointer to a dynamically-allocated buffer containing
1621 * the complete and validated header. Caller can pass the address
1622 * of a variable that will be filled in with the version of the
1623 * header object at the time it was read.
1624 *
1625 * Returns a pointer-coded errno if a failure occurs.
1594 */ 1626 */
1595static int rbd_read_header(struct rbd_device *rbd_dev, 1627static struct rbd_image_header_ondisk *
1596 struct rbd_image_header *header) 1628rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version)
1597{ 1629{
1598 ssize_t rc; 1630 struct rbd_image_header_ondisk *ondisk = NULL;
1599 struct rbd_image_header_ondisk *dh;
1600 u32 snap_count = 0; 1631 u32 snap_count = 0;
1601 u64 ver; 1632 u64 names_size = 0;
1602 size_t len; 1633 u32 want_count;
1634 int ret;
1603 1635
1604 /* 1636 /*
1605 * First reads the fixed-size header to determine the number 1637 * The complete header will include an array of its 64-bit
1606 * of snapshots, then re-reads it, along with all snapshot 1638 * snapshot ids, followed by the names of those snapshots as
1607 * records as well as their stored names. 1639 * a contiguous block of NUL-terminated strings. Note that
1640 * the number of snapshots could change by the time we read
1641 * it in, in which case we re-read it.
1608 */ 1642 */
1609 len = sizeof (*dh); 1643 do {
1610 while (1) { 1644 size_t size;
1611 dh = kmalloc(len, GFP_KERNEL); 1645
1612 if (!dh) 1646 kfree(ondisk);
1613 return -ENOMEM; 1647
1614 1648 size = sizeof (*ondisk);
1615 rc = rbd_req_sync_read(rbd_dev, 1649 size += snap_count * sizeof (struct rbd_image_snap_ondisk);
1616 CEPH_NOSNAP, 1650 size += names_size;
1651 ondisk = kmalloc(size, GFP_KERNEL);
1652 if (!ondisk)
1653 return ERR_PTR(-ENOMEM);
1654
1655 ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP,
1617 rbd_dev->header_name, 1656 rbd_dev->header_name,
1618 0, len, 1657 0, size,
1619 (char *)dh, &ver); 1658 (char *) ondisk, version);
1620 if (rc < 0) 1659
1621 goto out_dh; 1660 if (ret < 0)
1622 1661 goto out_err;
1623 rc = rbd_header_from_disk(header, dh, snap_count); 1662 if (WARN_ON((size_t) ret < size)) {
1624 if (rc < 0) { 1663 ret = -ENXIO;
1625 if (rc == -ENXIO) 1664 pr_warning("short header read for image %s"
1626 pr_warning("unrecognized header format" 1665 " (want %zd got %d)\n",
1627 " for image %s\n", 1666 rbd_dev->image_name, size, ret);
1628 rbd_dev->image_name); 1667 goto out_err;
1629 goto out_dh; 1668 }
1669 if (!rbd_dev_ondisk_valid(ondisk)) {
1670 ret = -ENXIO;
1671 pr_warning("invalid header for image %s\n",
1672 rbd_dev->image_name);
1673 goto out_err;
1630 } 1674 }
1631 1675
1632 if (snap_count == header->total_snaps) 1676 names_size = le64_to_cpu(ondisk->snap_names_len);
1633 break; 1677 want_count = snap_count;
1678 snap_count = le32_to_cpu(ondisk->snap_count);
1679 } while (snap_count != want_count);
1634 1680
1635 snap_count = header->total_snaps; 1681 return ondisk;
1636 len = sizeof (*dh) +
1637 snap_count * sizeof(struct rbd_image_snap_ondisk) +
1638 header->snap_names_len;
1639 1682
1640 rbd_header_free(header); 1683out_err:
1641 kfree(dh); 1684 kfree(ondisk);
1642 }
1643 header->obj_version = ver;
1644 1685
1645out_dh: 1686 return ERR_PTR(ret);
1646 kfree(dh);
1647 return rc;
1648} 1687}
1649 1688
1650/* 1689/*
1651 * create a snapshot 1690 * reload the ondisk the header
1652 */ 1691 */
1653static int rbd_header_add_snap(struct rbd_device *rbd_dev, 1692static int rbd_read_header(struct rbd_device *rbd_dev,
1654 const char *snap_name, 1693 struct rbd_image_header *header)
1655 gfp_t gfp_flags)
1656{ 1694{
1657 int name_len = strlen(snap_name); 1695 struct rbd_image_header_ondisk *ondisk;
1658 u64 new_snapid; 1696 u64 ver = 0;
1659 int ret; 1697 int ret;
1660 void *data, *p, *e;
1661 struct ceph_mon_client *monc;
1662
1663 /* we should create a snapshot only if we're pointing at the head */
1664 if (rbd_dev->snap_id != CEPH_NOSNAP)
1665 return -EINVAL;
1666 1698
1667 monc = &rbd_dev->rbd_client->client->monc; 1699 ondisk = rbd_dev_v1_header_read(rbd_dev, &ver);
1668 ret = ceph_monc_create_snapid(monc, rbd_dev->pool_id, &new_snapid); 1700 if (IS_ERR(ondisk))
1669 dout("created snapid=%llu\n", (unsigned long long) new_snapid); 1701 return PTR_ERR(ondisk);
1670 if (ret < 0) 1702 ret = rbd_header_from_disk(header, ondisk);
1671 return ret; 1703 if (ret >= 0)
1672 1704 header->obj_version = ver;
1673 data = kmalloc(name_len + 16, gfp_flags); 1705 kfree(ondisk);
1674 if (!data)
1675 return -ENOMEM;
1676 1706
1677 p = data; 1707 return ret;
1678 e = data + name_len + 16;
1679
1680 ceph_encode_string_safe(&p, e, snap_name, name_len, bad);
1681 ceph_encode_64_safe(&p, e, new_snapid, bad);
1682
1683 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
1684 "rbd", "snap_add",
1685 data, p - data, NULL);
1686
1687 kfree(data);
1688
1689 return ret < 0 ? ret : 0;
1690bad:
1691 return -ERANGE;
1692} 1708}
1693 1709
1694static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev) 1710static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
@@ -1715,11 +1731,15 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
1715 down_write(&rbd_dev->header_rwsem); 1731 down_write(&rbd_dev->header_rwsem);
1716 1732
1717 /* resized? */ 1733 /* resized? */
1718 if (rbd_dev->snap_id == CEPH_NOSNAP) { 1734 if (rbd_dev->mapping.snap_id == CEPH_NOSNAP) {
1719 sector_t size = (sector_t) h.image_size / SECTOR_SIZE; 1735 sector_t size = (sector_t) h.image_size / SECTOR_SIZE;
1720 1736
1721 dout("setting size to %llu sectors", (unsigned long long) size); 1737 if (size != (sector_t) rbd_dev->mapping.size) {
1722 set_capacity(rbd_dev->disk, size); 1738 dout("setting size to %llu sectors",
1739 (unsigned long long) size);
1740 rbd_dev->mapping.size = (u64) size;
1741 set_capacity(rbd_dev->disk, size);
1742 }
1723 } 1743 }
1724 1744
1725 /* rbd_dev->header.object_prefix shouldn't change */ 1745 /* rbd_dev->header.object_prefix shouldn't change */
@@ -1732,16 +1752,16 @@ static int __rbd_refresh_header(struct rbd_device *rbd_dev, u64 *hver)
1732 *hver = h.obj_version; 1752 *hver = h.obj_version;
1733 rbd_dev->header.obj_version = h.obj_version; 1753 rbd_dev->header.obj_version = h.obj_version;
1734 rbd_dev->header.image_size = h.image_size; 1754 rbd_dev->header.image_size = h.image_size;
1735 rbd_dev->header.total_snaps = h.total_snaps;
1736 rbd_dev->header.snapc = h.snapc; 1755 rbd_dev->header.snapc = h.snapc;
1737 rbd_dev->header.snap_names = h.snap_names; 1756 rbd_dev->header.snap_names = h.snap_names;
1738 rbd_dev->header.snap_names_len = h.snap_names_len;
1739 rbd_dev->header.snap_sizes = h.snap_sizes; 1757 rbd_dev->header.snap_sizes = h.snap_sizes;
1740 /* Free the extra copy of the object prefix */ 1758 /* Free the extra copy of the object prefix */
1741 WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix)); 1759 WARN_ON(strcmp(rbd_dev->header.object_prefix, h.object_prefix));
1742 kfree(h.object_prefix); 1760 kfree(h.object_prefix);
1743 1761
1744 ret = __rbd_init_snaps_header(rbd_dev); 1762 ret = rbd_dev_snaps_update(rbd_dev);
1763 if (!ret)
1764 ret = rbd_dev_snaps_register(rbd_dev);
1745 1765
1746 up_write(&rbd_dev->header_rwsem); 1766 up_write(&rbd_dev->header_rwsem);
1747 1767
@@ -1763,29 +1783,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1763{ 1783{
1764 struct gendisk *disk; 1784 struct gendisk *disk;
1765 struct request_queue *q; 1785 struct request_queue *q;
1766 int rc;
1767 u64 segment_size; 1786 u64 segment_size;
1768 u64 total_size = 0;
1769
1770 /* contact OSD, request size info about the object being mapped */
1771 rc = rbd_read_header(rbd_dev, &rbd_dev->header);
1772 if (rc)
1773 return rc;
1774
1775 /* no need to lock here, as rbd_dev is not registered yet */
1776 rc = __rbd_init_snaps_header(rbd_dev);
1777 if (rc)
1778 return rc;
1779
1780 rc = rbd_header_set_snap(rbd_dev, &total_size);
1781 if (rc)
1782 return rc;
1783 1787
1784 /* create gendisk info */ 1788 /* create gendisk info */
1785 rc = -ENOMEM;
1786 disk = alloc_disk(RBD_MINORS_PER_MAJOR); 1789 disk = alloc_disk(RBD_MINORS_PER_MAJOR);
1787 if (!disk) 1790 if (!disk)
1788 goto out; 1791 return -ENOMEM;
1789 1792
1790 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 1793 snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
1791 rbd_dev->dev_id); 1794 rbd_dev->dev_id);
@@ -1795,7 +1798,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1795 disk->private_data = rbd_dev; 1798 disk->private_data = rbd_dev;
1796 1799
1797 /* init rq */ 1800 /* init rq */
1798 rc = -ENOMEM;
1799 q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); 1801 q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock);
1800 if (!q) 1802 if (!q)
1801 goto out_disk; 1803 goto out_disk;
@@ -1816,20 +1818,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1816 q->queuedata = rbd_dev; 1818 q->queuedata = rbd_dev;
1817 1819
1818 rbd_dev->disk = disk; 1820 rbd_dev->disk = disk;
1819 rbd_dev->q = q;
1820 1821
1821 /* finally, announce the disk to the world */ 1822 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
1822 set_capacity(disk, total_size / SECTOR_SIZE);
1823 add_disk(disk);
1824 1823
1825 pr_info("%s: added with size 0x%llx\n",
1826 disk->disk_name, (unsigned long long)total_size);
1827 return 0; 1824 return 0;
1828
1829out_disk: 1825out_disk:
1830 put_disk(disk); 1826 put_disk(disk);
1831out: 1827
1832 return rc; 1828 return -ENOMEM;
1833} 1829}
1834 1830
1835/* 1831/*
@@ -1854,6 +1850,19 @@ static ssize_t rbd_size_show(struct device *dev,
1854 return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE); 1850 return sprintf(buf, "%llu\n", (unsigned long long) size * SECTOR_SIZE);
1855} 1851}
1856 1852
1853/*
1854 * Note this shows the features for whatever's mapped, which is not
1855 * necessarily the base image.
1856 */
1857static ssize_t rbd_features_show(struct device *dev,
1858 struct device_attribute *attr, char *buf)
1859{
1860 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1861
1862 return sprintf(buf, "0x%016llx\n",
1863 (unsigned long long) rbd_dev->mapping.features);
1864}
1865
1857static ssize_t rbd_major_show(struct device *dev, 1866static ssize_t rbd_major_show(struct device *dev,
1858 struct device_attribute *attr, char *buf) 1867 struct device_attribute *attr, char *buf)
1859{ 1868{
@@ -1895,13 +1904,25 @@ static ssize_t rbd_name_show(struct device *dev,
1895 return sprintf(buf, "%s\n", rbd_dev->image_name); 1904 return sprintf(buf, "%s\n", rbd_dev->image_name);
1896} 1905}
1897 1906
1907static ssize_t rbd_image_id_show(struct device *dev,
1908 struct device_attribute *attr, char *buf)
1909{
1910 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1911
1912 return sprintf(buf, "%s\n", rbd_dev->image_id);
1913}
1914
1915/*
1916 * Shows the name of the currently-mapped snapshot (or
1917 * RBD_SNAP_HEAD_NAME for the base image).
1918 */
1898static ssize_t rbd_snap_show(struct device *dev, 1919static ssize_t rbd_snap_show(struct device *dev,
1899 struct device_attribute *attr, 1920 struct device_attribute *attr,
1900 char *buf) 1921 char *buf)
1901{ 1922{
1902 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 1923 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
1903 1924
1904 return sprintf(buf, "%s\n", rbd_dev->snap_name); 1925 return sprintf(buf, "%s\n", rbd_dev->mapping.snap_name);
1905} 1926}
1906 1927
1907static ssize_t rbd_image_refresh(struct device *dev, 1928static ssize_t rbd_image_refresh(struct device *dev,
@@ -1918,25 +1939,27 @@ static ssize_t rbd_image_refresh(struct device *dev,
1918} 1939}
1919 1940
1920static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); 1941static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
1942static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
1921static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL); 1943static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
1922static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL); 1944static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
1923static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL); 1945static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
1924static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL); 1946static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
1925static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL); 1947static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
1948static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
1926static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh); 1949static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
1927static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL); 1950static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
1928static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
1929 1951
1930static struct attribute *rbd_attrs[] = { 1952static struct attribute *rbd_attrs[] = {
1931 &dev_attr_size.attr, 1953 &dev_attr_size.attr,
1954 &dev_attr_features.attr,
1932 &dev_attr_major.attr, 1955 &dev_attr_major.attr,
1933 &dev_attr_client_id.attr, 1956 &dev_attr_client_id.attr,
1934 &dev_attr_pool.attr, 1957 &dev_attr_pool.attr,
1935 &dev_attr_pool_id.attr, 1958 &dev_attr_pool_id.attr,
1936 &dev_attr_name.attr, 1959 &dev_attr_name.attr,
1960 &dev_attr_image_id.attr,
1937 &dev_attr_current_snap.attr, 1961 &dev_attr_current_snap.attr,
1938 &dev_attr_refresh.attr, 1962 &dev_attr_refresh.attr,
1939 &dev_attr_create_snap.attr,
1940 NULL 1963 NULL
1941}; 1964};
1942 1965
@@ -1982,12 +2005,24 @@ static ssize_t rbd_snap_id_show(struct device *dev,
1982 return sprintf(buf, "%llu\n", (unsigned long long)snap->id); 2005 return sprintf(buf, "%llu\n", (unsigned long long)snap->id);
1983} 2006}
1984 2007
2008static ssize_t rbd_snap_features_show(struct device *dev,
2009 struct device_attribute *attr,
2010 char *buf)
2011{
2012 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
2013
2014 return sprintf(buf, "0x%016llx\n",
2015 (unsigned long long) snap->features);
2016}
2017
1985static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL); 2018static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
1986static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL); 2019static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
2020static DEVICE_ATTR(snap_features, S_IRUGO, rbd_snap_features_show, NULL);
1987 2021
1988static struct attribute *rbd_snap_attrs[] = { 2022static struct attribute *rbd_snap_attrs[] = {
1989 &dev_attr_snap_size.attr, 2023 &dev_attr_snap_size.attr,
1990 &dev_attr_snap_id.attr, 2024 &dev_attr_snap_id.attr,
2025 &dev_attr_snap_features.attr,
1991 NULL, 2026 NULL,
1992}; 2027};
1993 2028
@@ -2012,10 +2047,21 @@ static struct device_type rbd_snap_device_type = {
2012 .release = rbd_snap_dev_release, 2047 .release = rbd_snap_dev_release,
2013}; 2048};
2014 2049
2050static bool rbd_snap_registered(struct rbd_snap *snap)
2051{
2052 bool ret = snap->dev.type == &rbd_snap_device_type;
2053 bool reg = device_is_registered(&snap->dev);
2054
2055 rbd_assert(!ret ^ reg);
2056
2057 return ret;
2058}
2059
2015static void __rbd_remove_snap_dev(struct rbd_snap *snap) 2060static void __rbd_remove_snap_dev(struct rbd_snap *snap)
2016{ 2061{
2017 list_del(&snap->node); 2062 list_del(&snap->node);
2018 device_unregister(&snap->dev); 2063 if (device_is_registered(&snap->dev))
2064 device_unregister(&snap->dev);
2019} 2065}
2020 2066
2021static int rbd_register_snap_dev(struct rbd_snap *snap, 2067static int rbd_register_snap_dev(struct rbd_snap *snap,
@@ -2028,13 +2074,17 @@ static int rbd_register_snap_dev(struct rbd_snap *snap,
2028 dev->parent = parent; 2074 dev->parent = parent;
2029 dev->release = rbd_snap_dev_release; 2075 dev->release = rbd_snap_dev_release;
2030 dev_set_name(dev, "snap_%s", snap->name); 2076 dev_set_name(dev, "snap_%s", snap->name);
2077 dout("%s: registering device for snapshot %s\n", __func__, snap->name);
2078
2031 ret = device_register(dev); 2079 ret = device_register(dev);
2032 2080
2033 return ret; 2081 return ret;
2034} 2082}
2035 2083
2036static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev, 2084static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
2037 int i, const char *name) 2085 const char *snap_name,
2086 u64 snap_id, u64 snap_size,
2087 u64 snap_features)
2038{ 2088{
2039 struct rbd_snap *snap; 2089 struct rbd_snap *snap;
2040 int ret; 2090 int ret;
@@ -2044,17 +2094,13 @@ static struct rbd_snap *__rbd_add_snap_dev(struct rbd_device *rbd_dev,
2044 return ERR_PTR(-ENOMEM); 2094 return ERR_PTR(-ENOMEM);
2045 2095
2046 ret = -ENOMEM; 2096 ret = -ENOMEM;
2047 snap->name = kstrdup(name, GFP_KERNEL); 2097 snap->name = kstrdup(snap_name, GFP_KERNEL);
2048 if (!snap->name) 2098 if (!snap->name)
2049 goto err; 2099 goto err;
2050 2100
2051 snap->size = rbd_dev->header.snap_sizes[i]; 2101 snap->id = snap_id;
2052 snap->id = rbd_dev->header.snapc->snaps[i]; 2102 snap->size = snap_size;
2053 if (device_is_registered(&rbd_dev->dev)) { 2103 snap->features = snap_features;
2054 ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
2055 if (ret < 0)
2056 goto err;
2057 }
2058 2104
2059 return snap; 2105 return snap;
2060 2106
@@ -2065,128 +2111,439 @@ err:
2065 return ERR_PTR(ret); 2111 return ERR_PTR(ret);
2066} 2112}
2067 2113
2114static char *rbd_dev_v1_snap_info(struct rbd_device *rbd_dev, u32 which,
2115 u64 *snap_size, u64 *snap_features)
2116{
2117 char *snap_name;
2118
2119 rbd_assert(which < rbd_dev->header.snapc->num_snaps);
2120
2121 *snap_size = rbd_dev->header.snap_sizes[which];
2122 *snap_features = 0; /* No features for v1 */
2123
2124 /* Skip over names until we find the one we are looking for */
2125
2126 snap_name = rbd_dev->header.snap_names;
2127 while (which--)
2128 snap_name += strlen(snap_name) + 1;
2129
2130 return snap_name;
2131}
2132
2068/* 2133/*
2069 * search for the previous snap in a null delimited string list 2134 * Get the size and object order for an image snapshot, or if
2135 * snap_id is CEPH_NOSNAP, gets this information for the base
2136 * image.
2070 */ 2137 */
2071const char *rbd_prev_snap_name(const char *name, const char *start) 2138static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
2139 u8 *order, u64 *snap_size)
2072{ 2140{
2073 if (name < start + 2) 2141 __le64 snapid = cpu_to_le64(snap_id);
2074 return NULL; 2142 int ret;
2143 struct {
2144 u8 order;
2145 __le64 size;
2146 } __attribute__ ((packed)) size_buf = { 0 };
2147
2148 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2149 "rbd", "get_size",
2150 (char *) &snapid, sizeof (snapid),
2151 (char *) &size_buf, sizeof (size_buf),
2152 CEPH_OSD_FLAG_READ, NULL);
2153 dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2154 if (ret < 0)
2155 return ret;
2156
2157 *order = size_buf.order;
2158 *snap_size = le64_to_cpu(size_buf.size);
2159
2160 dout(" snap_id 0x%016llx order = %u, snap_size = %llu\n",
2161 (unsigned long long) snap_id, (unsigned int) *order,
2162 (unsigned long long) *snap_size);
2163
2164 return 0;
2165}
2166
2167static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
2168{
2169 return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
2170 &rbd_dev->header.obj_order,
2171 &rbd_dev->header.image_size);
2172}
2173
2174static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
2175{
2176 void *reply_buf;
2177 int ret;
2178 void *p;
2179
2180 reply_buf = kzalloc(RBD_OBJ_PREFIX_LEN_MAX, GFP_KERNEL);
2181 if (!reply_buf)
2182 return -ENOMEM;
2183
2184 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2185 "rbd", "get_object_prefix",
2186 NULL, 0,
2187 reply_buf, RBD_OBJ_PREFIX_LEN_MAX,
2188 CEPH_OSD_FLAG_READ, NULL);
2189 dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2190 if (ret < 0)
2191 goto out;
2192
2193 p = reply_buf;
2194 rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
2195 p + RBD_OBJ_PREFIX_LEN_MAX,
2196 NULL, GFP_NOIO);
2197
2198 if (IS_ERR(rbd_dev->header.object_prefix)) {
2199 ret = PTR_ERR(rbd_dev->header.object_prefix);
2200 rbd_dev->header.object_prefix = NULL;
2201 } else {
2202 dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
2203 }
2075 2204
2076 name -= 2; 2205out:
2077 while (*name) { 2206 kfree(reply_buf);
2078 if (name == start) 2207
2079 return start; 2208 return ret;
2080 name--; 2209}
2210
2211static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
2212 u64 *snap_features)
2213{
2214 __le64 snapid = cpu_to_le64(snap_id);
2215 struct {
2216 __le64 features;
2217 __le64 incompat;
2218 } features_buf = { 0 };
2219 int ret;
2220
2221 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2222 "rbd", "get_features",
2223 (char *) &snapid, sizeof (snapid),
2224 (char *) &features_buf, sizeof (features_buf),
2225 CEPH_OSD_FLAG_READ, NULL);
2226 dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2227 if (ret < 0)
2228 return ret;
2229 *snap_features = le64_to_cpu(features_buf.features);
2230
2231 dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
2232 (unsigned long long) snap_id,
2233 (unsigned long long) *snap_features,
2234 (unsigned long long) le64_to_cpu(features_buf.incompat));
2235
2236 return 0;
2237}
2238
2239static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
2240{
2241 return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
2242 &rbd_dev->header.features);
2243}
2244
2245static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver)
2246{
2247 size_t size;
2248 int ret;
2249 void *reply_buf;
2250 void *p;
2251 void *end;
2252 u64 seq;
2253 u32 snap_count;
2254 struct ceph_snap_context *snapc;
2255 u32 i;
2256
2257 /*
2258 * We'll need room for the seq value (maximum snapshot id),
2259 * snapshot count, and array of that many snapshot ids.
2260 * For now we have a fixed upper limit on the number we're
2261 * prepared to receive.
2262 */
2263 size = sizeof (__le64) + sizeof (__le32) +
2264 RBD_MAX_SNAP_COUNT * sizeof (__le64);
2265 reply_buf = kzalloc(size, GFP_KERNEL);
2266 if (!reply_buf)
2267 return -ENOMEM;
2268
2269 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2270 "rbd", "get_snapcontext",
2271 NULL, 0,
2272 reply_buf, size,
2273 CEPH_OSD_FLAG_READ, ver);
2274 dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2275 if (ret < 0)
2276 goto out;
2277
2278 ret = -ERANGE;
2279 p = reply_buf;
2280 end = (char *) reply_buf + size;
2281 ceph_decode_64_safe(&p, end, seq, out);
2282 ceph_decode_32_safe(&p, end, snap_count, out);
2283
2284 /*
2285 * Make sure the reported number of snapshot ids wouldn't go
2286 * beyond the end of our buffer. But before checking that,
2287 * make sure the computed size of the snapshot context we
2288 * allocate is representable in a size_t.
2289 */
2290 if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
2291 / sizeof (u64)) {
2292 ret = -EINVAL;
2293 goto out;
2081 } 2294 }
2082 return name + 1; 2295 if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
2296 goto out;
2297
2298 size = sizeof (struct ceph_snap_context) +
2299 snap_count * sizeof (snapc->snaps[0]);
2300 snapc = kmalloc(size, GFP_KERNEL);
2301 if (!snapc) {
2302 ret = -ENOMEM;
2303 goto out;
2304 }
2305
2306 atomic_set(&snapc->nref, 1);
2307 snapc->seq = seq;
2308 snapc->num_snaps = snap_count;
2309 for (i = 0; i < snap_count; i++)
2310 snapc->snaps[i] = ceph_decode_64(&p);
2311
2312 rbd_dev->header.snapc = snapc;
2313
2314 dout(" snap context seq = %llu, snap_count = %u\n",
2315 (unsigned long long) seq, (unsigned int) snap_count);
2316
2317out:
2318 kfree(reply_buf);
2319
2320 return 0;
2083} 2321}
2084 2322
2085/* 2323static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which)
2086 * compare the old list of snapshots that we have to what's in the header
2087 * and update it accordingly. Note that the header holds the snapshots
2088 * in a reverse order (from newest to oldest) and we need to go from
2089 * older to new so that we don't get a duplicate snap name when
2090 * doing the process (e.g., removed snapshot and recreated a new
2091 * one with the same name.
2092 */
2093static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
2094{ 2324{
2095 const char *name, *first_name; 2325 size_t size;
2096 int i = rbd_dev->header.total_snaps; 2326 void *reply_buf;
2097 struct rbd_snap *snap, *old_snap = NULL; 2327 __le64 snap_id;
2098 struct list_head *p, *n; 2328 int ret;
2329 void *p;
2330 void *end;
2331 size_t snap_name_len;
2332 char *snap_name;
2333
2334 size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
2335 reply_buf = kmalloc(size, GFP_KERNEL);
2336 if (!reply_buf)
2337 return ERR_PTR(-ENOMEM);
2099 2338
2100 first_name = rbd_dev->header.snap_names; 2339 snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]);
2101 name = first_name + rbd_dev->header.snap_names_len; 2340 ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name,
2341 "rbd", "get_snapshot_name",
2342 (char *) &snap_id, sizeof (snap_id),
2343 reply_buf, size,
2344 CEPH_OSD_FLAG_READ, NULL);
2345 dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2346 if (ret < 0)
2347 goto out;
2102 2348
2103 list_for_each_prev_safe(p, n, &rbd_dev->snaps) { 2349 p = reply_buf;
2104 u64 cur_id; 2350 end = (char *) reply_buf + size;
2351 snap_name_len = 0;
2352 snap_name = ceph_extract_encoded_string(&p, end, &snap_name_len,
2353 GFP_KERNEL);
2354 if (IS_ERR(snap_name)) {
2355 ret = PTR_ERR(snap_name);
2356 goto out;
2357 } else {
2358 dout(" snap_id 0x%016llx snap_name = %s\n",
2359 (unsigned long long) le64_to_cpu(snap_id), snap_name);
2360 }
2361 kfree(reply_buf);
2105 2362
2106 old_snap = list_entry(p, struct rbd_snap, node); 2363 return snap_name;
2364out:
2365 kfree(reply_buf);
2107 2366
2108 if (i) 2367 return ERR_PTR(ret);
2109 cur_id = rbd_dev->header.snapc->snaps[i - 1]; 2368}
2110 2369
2111 if (!i || old_snap->id < cur_id) { 2370static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which,
2112 /* 2371 u64 *snap_size, u64 *snap_features)
2113 * old_snap->id was skipped, thus was 2372{
2114 * removed. If this rbd_dev is mapped to 2373 __le64 snap_id;
2115 * the removed snapshot, record that it no 2374 u8 order;
2116 * longer exists, to prevent further I/O. 2375 int ret;
2117 */ 2376
2118 if (rbd_dev->snap_id == old_snap->id) 2377 snap_id = rbd_dev->header.snapc->snaps[which];
2119 rbd_dev->snap_exists = false; 2378 ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, &order, snap_size);
2120 __rbd_remove_snap_dev(old_snap); 2379 if (ret)
2121 continue; 2380 return ERR_PTR(ret);
2122 } 2381 ret = _rbd_dev_v2_snap_features(rbd_dev, snap_id, snap_features);
2123 if (old_snap->id == cur_id) { 2382 if (ret)
2124 /* we have this snapshot already */ 2383 return ERR_PTR(ret);
2125 i--; 2384
2126 name = rbd_prev_snap_name(name, first_name); 2385 return rbd_dev_v2_snap_name(rbd_dev, which);
2386}
2387
2388static char *rbd_dev_snap_info(struct rbd_device *rbd_dev, u32 which,
2389 u64 *snap_size, u64 *snap_features)
2390{
2391 if (rbd_dev->image_format == 1)
2392 return rbd_dev_v1_snap_info(rbd_dev, which,
2393 snap_size, snap_features);
2394 if (rbd_dev->image_format == 2)
2395 return rbd_dev_v2_snap_info(rbd_dev, which,
2396 snap_size, snap_features);
2397 return ERR_PTR(-EINVAL);
2398}
2399
2400/*
2401 * Scan the rbd device's current snapshot list and compare it to the
2402 * newly-received snapshot context. Remove any existing snapshots
2403 * not present in the new snapshot context. Add a new snapshot for
2404 * any snaphots in the snapshot context not in the current list.
2405 * And verify there are no changes to snapshots we already know
2406 * about.
2407 *
2408 * Assumes the snapshots in the snapshot context are sorted by
2409 * snapshot id, highest id first. (Snapshots in the rbd_dev's list
2410 * are also maintained in that order.)
2411 */
2412static int rbd_dev_snaps_update(struct rbd_device *rbd_dev)
2413{
2414 struct ceph_snap_context *snapc = rbd_dev->header.snapc;
2415 const u32 snap_count = snapc->num_snaps;
2416 struct list_head *head = &rbd_dev->snaps;
2417 struct list_head *links = head->next;
2418 u32 index = 0;
2419
2420 dout("%s: snap count is %u\n", __func__, (unsigned int) snap_count);
2421 while (index < snap_count || links != head) {
2422 u64 snap_id;
2423 struct rbd_snap *snap;
2424 char *snap_name;
2425 u64 snap_size = 0;
2426 u64 snap_features = 0;
2427
2428 snap_id = index < snap_count ? snapc->snaps[index]
2429 : CEPH_NOSNAP;
2430 snap = links != head ? list_entry(links, struct rbd_snap, node)
2431 : NULL;
2432 rbd_assert(!snap || snap->id != CEPH_NOSNAP);
2433
2434 if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) {
2435 struct list_head *next = links->next;
2436
2437 /* Existing snapshot not in the new snap context */
2438
2439 if (rbd_dev->mapping.snap_id == snap->id)
2440 rbd_dev->mapping.snap_exists = false;
2441 __rbd_remove_snap_dev(snap);
2442 dout("%ssnap id %llu has been removed\n",
2443 rbd_dev->mapping.snap_id == snap->id ?
2444 "mapped " : "",
2445 (unsigned long long) snap->id);
2446
2447 /* Done with this list entry; advance */
2448
2449 links = next;
2127 continue; 2450 continue;
2128 } 2451 }
2129 for (; i > 0; 2452
2130 i--, name = rbd_prev_snap_name(name, first_name)) { 2453 snap_name = rbd_dev_snap_info(rbd_dev, index,
2131 if (!name) { 2454 &snap_size, &snap_features);
2132 WARN_ON(1); 2455 if (IS_ERR(snap_name))
2133 return -EINVAL; 2456 return PTR_ERR(snap_name);
2457
2458 dout("entry %u: snap_id = %llu\n", (unsigned int) snap_count,
2459 (unsigned long long) snap_id);
2460 if (!snap || (snap_id != CEPH_NOSNAP && snap->id < snap_id)) {
2461 struct rbd_snap *new_snap;
2462
2463 /* We haven't seen this snapshot before */
2464
2465 new_snap = __rbd_add_snap_dev(rbd_dev, snap_name,
2466 snap_id, snap_size, snap_features);
2467 if (IS_ERR(new_snap)) {
2468 int err = PTR_ERR(new_snap);
2469
2470 dout(" failed to add dev, error %d\n", err);
2471
2472 return err;
2134 } 2473 }
2135 cur_id = rbd_dev->header.snapc->snaps[i]; 2474
2136 /* snapshot removal? handle it above */ 2475 /* New goes before existing, or at end of list */
2137 if (cur_id >= old_snap->id) 2476
2138 break; 2477 dout(" added dev%s\n", snap ? "" : " at end\n");
2139 /* a new snapshot */ 2478 if (snap)
2140 snap = __rbd_add_snap_dev(rbd_dev, i - 1, name); 2479 list_add_tail(&new_snap->node, &snap->node);
2141 if (IS_ERR(snap)) 2480 else
2142 return PTR_ERR(snap); 2481 list_add_tail(&new_snap->node, head);
2143 2482 } else {
2144 /* note that we add it backward so using n and not p */ 2483 /* Already have this one */
2145 list_add(&snap->node, n); 2484
2146 p = &snap->node; 2485 dout(" already present\n");
2486
2487 rbd_assert(snap->size == snap_size);
2488 rbd_assert(!strcmp(snap->name, snap_name));
2489 rbd_assert(snap->features == snap_features);
2490
2491 /* Done with this list entry; advance */
2492
2493 links = links->next;
2147 } 2494 }
2495
2496 /* Advance to the next entry in the snapshot context */
2497
2498 index++;
2148 } 2499 }
2149 /* we're done going over the old snap list, just add what's left */ 2500 dout("%s: done\n", __func__);
2150 for (; i > 0; i--) { 2501
2151 name = rbd_prev_snap_name(name, first_name); 2502 return 0;
2152 if (!name) { 2503}
2153 WARN_ON(1); 2504
2154 return -EINVAL; 2505/*
2506 * Scan the list of snapshots and register the devices for any that
2507 * have not already been registered.
2508 */
2509static int rbd_dev_snaps_register(struct rbd_device *rbd_dev)
2510{
2511 struct rbd_snap *snap;
2512 int ret = 0;
2513
2514 dout("%s called\n", __func__);
2515 if (WARN_ON(!device_is_registered(&rbd_dev->dev)))
2516 return -EIO;
2517
2518 list_for_each_entry(snap, &rbd_dev->snaps, node) {
2519 if (!rbd_snap_registered(snap)) {
2520 ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
2521 if (ret < 0)
2522 break;
2155 } 2523 }
2156 snap = __rbd_add_snap_dev(rbd_dev, i - 1, name);
2157 if (IS_ERR(snap))
2158 return PTR_ERR(snap);
2159 list_add(&snap->node, &rbd_dev->snaps);
2160 } 2524 }
2525 dout("%s: returning %d\n", __func__, ret);
2161 2526
2162 return 0; 2527 return ret;
2163} 2528}
2164 2529
2165static int rbd_bus_add_dev(struct rbd_device *rbd_dev) 2530static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
2166{ 2531{
2167 int ret;
2168 struct device *dev; 2532 struct device *dev;
2169 struct rbd_snap *snap; 2533 int ret;
2170 2534
2171 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 2535 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2172 dev = &rbd_dev->dev;
2173 2536
2537 dev = &rbd_dev->dev;
2174 dev->bus = &rbd_bus_type; 2538 dev->bus = &rbd_bus_type;
2175 dev->type = &rbd_device_type; 2539 dev->type = &rbd_device_type;
2176 dev->parent = &rbd_root_dev; 2540 dev->parent = &rbd_root_dev;
2177 dev->release = rbd_dev_release; 2541 dev->release = rbd_dev_release;
2178 dev_set_name(dev, "%d", rbd_dev->dev_id); 2542 dev_set_name(dev, "%d", rbd_dev->dev_id);
2179 ret = device_register(dev); 2543 ret = device_register(dev);
2180 if (ret < 0)
2181 goto out;
2182 2544
2183 list_for_each_entry(snap, &rbd_dev->snaps, node) {
2184 ret = rbd_register_snap_dev(snap, &rbd_dev->dev);
2185 if (ret < 0)
2186 break;
2187 }
2188out:
2189 mutex_unlock(&ctl_mutex); 2545 mutex_unlock(&ctl_mutex);
2546
2190 return ret; 2547 return ret;
2191} 2548}
2192 2549
@@ -2211,33 +2568,37 @@ static int rbd_init_watch_dev(struct rbd_device *rbd_dev)
2211 return ret; 2568 return ret;
2212} 2569}
2213 2570
2214static atomic64_t rbd_id_max = ATOMIC64_INIT(0); 2571static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0);
2215 2572
2216/* 2573/*
2217 * Get a unique rbd identifier for the given new rbd_dev, and add 2574 * Get a unique rbd identifier for the given new rbd_dev, and add
2218 * the rbd_dev to the global list. The minimum rbd id is 1. 2575 * the rbd_dev to the global list. The minimum rbd id is 1.
2219 */ 2576 */
2220static void rbd_id_get(struct rbd_device *rbd_dev) 2577static void rbd_dev_id_get(struct rbd_device *rbd_dev)
2221{ 2578{
2222 rbd_dev->dev_id = atomic64_inc_return(&rbd_id_max); 2579 rbd_dev->dev_id = atomic64_inc_return(&rbd_dev_id_max);
2223 2580
2224 spin_lock(&rbd_dev_list_lock); 2581 spin_lock(&rbd_dev_list_lock);
2225 list_add_tail(&rbd_dev->node, &rbd_dev_list); 2582 list_add_tail(&rbd_dev->node, &rbd_dev_list);
2226 spin_unlock(&rbd_dev_list_lock); 2583 spin_unlock(&rbd_dev_list_lock);
2584 dout("rbd_dev %p given dev id %llu\n", rbd_dev,
2585 (unsigned long long) rbd_dev->dev_id);
2227} 2586}
2228 2587
2229/* 2588/*
2230 * Remove an rbd_dev from the global list, and record that its 2589 * Remove an rbd_dev from the global list, and record that its
2231 * identifier is no longer in use. 2590 * identifier is no longer in use.
2232 */ 2591 */
2233static void rbd_id_put(struct rbd_device *rbd_dev) 2592static void rbd_dev_id_put(struct rbd_device *rbd_dev)
2234{ 2593{
2235 struct list_head *tmp; 2594 struct list_head *tmp;
2236 int rbd_id = rbd_dev->dev_id; 2595 int rbd_id = rbd_dev->dev_id;
2237 int max_id; 2596 int max_id;
2238 2597
2239 BUG_ON(rbd_id < 1); 2598 rbd_assert(rbd_id > 0);
2240 2599
2600 dout("rbd_dev %p released dev id %llu\n", rbd_dev,
2601 (unsigned long long) rbd_dev->dev_id);
2241 spin_lock(&rbd_dev_list_lock); 2602 spin_lock(&rbd_dev_list_lock);
2242 list_del_init(&rbd_dev->node); 2603 list_del_init(&rbd_dev->node);
2243 2604
@@ -2245,7 +2606,7 @@ static void rbd_id_put(struct rbd_device *rbd_dev)
2245 * If the id being "put" is not the current maximum, there 2606 * If the id being "put" is not the current maximum, there
2246 * is nothing special we need to do. 2607 * is nothing special we need to do.
2247 */ 2608 */
2248 if (rbd_id != atomic64_read(&rbd_id_max)) { 2609 if (rbd_id != atomic64_read(&rbd_dev_id_max)) {
2249 spin_unlock(&rbd_dev_list_lock); 2610 spin_unlock(&rbd_dev_list_lock);
2250 return; 2611 return;
2251 } 2612 }
@@ -2266,12 +2627,13 @@ static void rbd_id_put(struct rbd_device *rbd_dev)
2266 spin_unlock(&rbd_dev_list_lock); 2627 spin_unlock(&rbd_dev_list_lock);
2267 2628
2268 /* 2629 /*
2269 * The max id could have been updated by rbd_id_get(), in 2630 * The max id could have been updated by rbd_dev_id_get(), in
2270 * which case it now accurately reflects the new maximum. 2631 * which case it now accurately reflects the new maximum.
2271 * Be careful not to overwrite the maximum value in that 2632 * Be careful not to overwrite the maximum value in that
2272 * case. 2633 * case.
2273 */ 2634 */
2274 atomic64_cmpxchg(&rbd_id_max, rbd_id, max_id); 2635 atomic64_cmpxchg(&rbd_dev_id_max, rbd_id, max_id);
2636 dout(" max dev id has been reset\n");
2275} 2637}
2276 2638
2277/* 2639/*
@@ -2360,28 +2722,31 @@ static inline char *dup_token(const char **buf, size_t *lenp)
2360} 2722}
2361 2723
2362/* 2724/*
2363 * This fills in the pool_name, image_name, image_name_len, snap_name, 2725 * This fills in the pool_name, image_name, image_name_len, rbd_dev,
2364 * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based 2726 * rbd_md_name, and name fields of the given rbd_dev, based on the
2365 * on the list of monitor addresses and other options provided via 2727 * list of monitor addresses and other options provided via
2366 * /sys/bus/rbd/add. 2728 * /sys/bus/rbd/add. Returns a pointer to a dynamically-allocated
2729 * copy of the snapshot name to map if successful, or a
2730 * pointer-coded error otherwise.
2367 * 2731 *
2368 * Note: rbd_dev is assumed to have been initially zero-filled. 2732 * Note: rbd_dev is assumed to have been initially zero-filled.
2369 */ 2733 */
2370static int rbd_add_parse_args(struct rbd_device *rbd_dev, 2734static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
2371 const char *buf, 2735 const char *buf,
2372 const char **mon_addrs, 2736 const char **mon_addrs,
2373 size_t *mon_addrs_size, 2737 size_t *mon_addrs_size,
2374 char *options, 2738 char *options,
2375 size_t options_size) 2739 size_t options_size)
2376{ 2740{
2377 size_t len; 2741 size_t len;
2378 int ret; 2742 char *err_ptr = ERR_PTR(-EINVAL);
2743 char *snap_name;
2379 2744
2380 /* The first four tokens are required */ 2745 /* The first four tokens are required */
2381 2746
2382 len = next_token(&buf); 2747 len = next_token(&buf);
2383 if (!len) 2748 if (!len)
2384 return -EINVAL; 2749 return err_ptr;
2385 *mon_addrs_size = len + 1; 2750 *mon_addrs_size = len + 1;
2386 *mon_addrs = buf; 2751 *mon_addrs = buf;
2387 2752
@@ -2389,9 +2754,9 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev,
2389 2754
2390 len = copy_token(&buf, options, options_size); 2755 len = copy_token(&buf, options, options_size);
2391 if (!len || len >= options_size) 2756 if (!len || len >= options_size)
2392 return -EINVAL; 2757 return err_ptr;
2393 2758
2394 ret = -ENOMEM; 2759 err_ptr = ERR_PTR(-ENOMEM);
2395 rbd_dev->pool_name = dup_token(&buf, NULL); 2760 rbd_dev->pool_name = dup_token(&buf, NULL);
2396 if (!rbd_dev->pool_name) 2761 if (!rbd_dev->pool_name)
2397 goto out_err; 2762 goto out_err;
@@ -2400,41 +2765,227 @@ static int rbd_add_parse_args(struct rbd_device *rbd_dev,
2400 if (!rbd_dev->image_name) 2765 if (!rbd_dev->image_name)
2401 goto out_err; 2766 goto out_err;
2402 2767
2403 /* Create the name of the header object */ 2768 /* Snapshot name is optional */
2769 len = next_token(&buf);
2770 if (!len) {
2771 buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
2772 len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
2773 }
2774 snap_name = kmalloc(len + 1, GFP_KERNEL);
2775 if (!snap_name)
2776 goto out_err;
2777 memcpy(snap_name, buf, len);
2778 *(snap_name + len) = '\0';
2404 2779
2405 rbd_dev->header_name = kmalloc(rbd_dev->image_name_len 2780dout(" SNAP_NAME is <%s>, len is %zd\n", snap_name, len);
2406 + sizeof (RBD_SUFFIX), 2781
2407 GFP_KERNEL); 2782 return snap_name;
2408 if (!rbd_dev->header_name) 2783
2784out_err:
2785 kfree(rbd_dev->image_name);
2786 rbd_dev->image_name = NULL;
2787 rbd_dev->image_name_len = 0;
2788 kfree(rbd_dev->pool_name);
2789 rbd_dev->pool_name = NULL;
2790
2791 return err_ptr;
2792}
2793
2794/*
2795 * An rbd format 2 image has a unique identifier, distinct from the
2796 * name given to it by the user. Internally, that identifier is
2797 * what's used to specify the names of objects related to the image.
2798 *
2799 * A special "rbd id" object is used to map an rbd image name to its
2800 * id. If that object doesn't exist, then there is no v2 rbd image
2801 * with the supplied name.
2802 *
2803 * This function will record the given rbd_dev's image_id field if
2804 * it can be determined, and in that case will return 0. If any
2805 * errors occur a negative errno will be returned and the rbd_dev's
2806 * image_id field will be unchanged (and should be NULL).
2807 */
2808static int rbd_dev_image_id(struct rbd_device *rbd_dev)
2809{
2810 int ret;
2811 size_t size;
2812 char *object_name;
2813 void *response;
2814 void *p;
2815
2816 /*
2817 * First, see if the format 2 image id file exists, and if
2818 * so, get the image's persistent id from it.
2819 */
2820 size = sizeof (RBD_ID_PREFIX) + rbd_dev->image_name_len;
2821 object_name = kmalloc(size, GFP_NOIO);
2822 if (!object_name)
2823 return -ENOMEM;
2824 sprintf(object_name, "%s%s", RBD_ID_PREFIX, rbd_dev->image_name);
2825 dout("rbd id object name is %s\n", object_name);
2826
2827 /* Response will be an encoded string, which includes a length */
2828
2829 size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
2830 response = kzalloc(size, GFP_NOIO);
2831 if (!response) {
2832 ret = -ENOMEM;
2833 goto out;
2834 }
2835
2836 ret = rbd_req_sync_exec(rbd_dev, object_name,
2837 "rbd", "get_id",
2838 NULL, 0,
2839 response, RBD_IMAGE_ID_LEN_MAX,
2840 CEPH_OSD_FLAG_READ, NULL);
2841 dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret);
2842 if (ret < 0)
2843 goto out;
2844
2845 p = response;
2846 rbd_dev->image_id = ceph_extract_encoded_string(&p,
2847 p + RBD_IMAGE_ID_LEN_MAX,
2848 &rbd_dev->image_id_len,
2849 GFP_NOIO);
2850 if (IS_ERR(rbd_dev->image_id)) {
2851 ret = PTR_ERR(rbd_dev->image_id);
2852 rbd_dev->image_id = NULL;
2853 } else {
2854 dout("image_id is %s\n", rbd_dev->image_id);
2855 }
2856out:
2857 kfree(response);
2858 kfree(object_name);
2859
2860 return ret;
2861}
2862
2863static int rbd_dev_v1_probe(struct rbd_device *rbd_dev)
2864{
2865 int ret;
2866 size_t size;
2867
2868 /* Version 1 images have no id; empty string is used */
2869
2870 rbd_dev->image_id = kstrdup("", GFP_KERNEL);
2871 if (!rbd_dev->image_id)
2872 return -ENOMEM;
2873 rbd_dev->image_id_len = 0;
2874
2875 /* Record the header object name for this rbd image. */
2876
2877 size = rbd_dev->image_name_len + sizeof (RBD_SUFFIX);
2878 rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
2879 if (!rbd_dev->header_name) {
2880 ret = -ENOMEM;
2409 goto out_err; 2881 goto out_err;
2882 }
2410 sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX); 2883 sprintf(rbd_dev->header_name, "%s%s", rbd_dev->image_name, RBD_SUFFIX);
2411 2884
2885 /* Populate rbd image metadata */
2886
2887 ret = rbd_read_header(rbd_dev, &rbd_dev->header);
2888 if (ret < 0)
2889 goto out_err;
2890 rbd_dev->image_format = 1;
2891
2892 dout("discovered version 1 image, header name is %s\n",
2893 rbd_dev->header_name);
2894
2895 return 0;
2896
2897out_err:
2898 kfree(rbd_dev->header_name);
2899 rbd_dev->header_name = NULL;
2900 kfree(rbd_dev->image_id);
2901 rbd_dev->image_id = NULL;
2902
2903 return ret;
2904}
2905
2906static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
2907{
2908 size_t size;
2909 int ret;
2910 u64 ver = 0;
2911
2412 /* 2912 /*
2413 * The snapshot name is optional. If none is is supplied, 2913 * Image id was filled in by the caller. Record the header
2414 * we use the default value. 2914 * object name for this rbd image.
2415 */ 2915 */
2416 rbd_dev->snap_name = dup_token(&buf, &len); 2916 size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->image_id_len;
2417 if (!rbd_dev->snap_name) 2917 rbd_dev->header_name = kmalloc(size, GFP_KERNEL);
2918 if (!rbd_dev->header_name)
2919 return -ENOMEM;
2920 sprintf(rbd_dev->header_name, "%s%s",
2921 RBD_HEADER_PREFIX, rbd_dev->image_id);
2922
2923 /* Get the size and object order for the image */
2924
2925 ret = rbd_dev_v2_image_size(rbd_dev);
2926 if (ret < 0)
2418 goto out_err; 2927 goto out_err;
2419 if (!len) {
2420 /* Replace the empty name with the default */
2421 kfree(rbd_dev->snap_name);
2422 rbd_dev->snap_name
2423 = kmalloc(sizeof (RBD_SNAP_HEAD_NAME), GFP_KERNEL);
2424 if (!rbd_dev->snap_name)
2425 goto out_err;
2426 2928
2427 memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME, 2929 /* Get the object prefix (a.k.a. block_name) for the image */
2428 sizeof (RBD_SNAP_HEAD_NAME));
2429 }
2430 2930
2431 return 0; 2931 ret = rbd_dev_v2_object_prefix(rbd_dev);
2932 if (ret < 0)
2933 goto out_err;
2934
2935 /* Get the features for the image */
2432 2936
2937 ret = rbd_dev_v2_features(rbd_dev);
2938 if (ret < 0)
2939 goto out_err;
2940
2941 /* crypto and compression type aren't (yet) supported for v2 images */
2942
2943 rbd_dev->header.crypt_type = 0;
2944 rbd_dev->header.comp_type = 0;
2945
2946 /* Get the snapshot context, plus the header version */
2947
2948 ret = rbd_dev_v2_snap_context(rbd_dev, &ver);
2949 if (ret)
2950 goto out_err;
2951 rbd_dev->header.obj_version = ver;
2952
2953 rbd_dev->image_format = 2;
2954
2955 dout("discovered version 2 image, header name is %s\n",
2956 rbd_dev->header_name);
2957
2958 return -ENOTSUPP;
2433out_err: 2959out_err:
2434 kfree(rbd_dev->header_name); 2960 kfree(rbd_dev->header_name);
2435 kfree(rbd_dev->image_name); 2961 rbd_dev->header_name = NULL;
2436 kfree(rbd_dev->pool_name); 2962 kfree(rbd_dev->header.object_prefix);
2437 rbd_dev->pool_name = NULL; 2963 rbd_dev->header.object_prefix = NULL;
2964
2965 return ret;
2966}
2967
2968/*
2969 * Probe for the existence of the header object for the given rbd
2970 * device. For format 2 images this includes determining the image
2971 * id.
2972 */
2973static int rbd_dev_probe(struct rbd_device *rbd_dev)
2974{
2975 int ret;
2976
2977 /*
2978 * Get the id from the image id object. If it's not a
2979 * format 2 image, we'll get ENOENT back, and we'll assume
2980 * it's a format 1 image.
2981 */
2982 ret = rbd_dev_image_id(rbd_dev);
2983 if (ret)
2984 ret = rbd_dev_v1_probe(rbd_dev);
2985 else
2986 ret = rbd_dev_v2_probe(rbd_dev);
2987 if (ret)
2988 dout("probe failed, returning %d\n", ret);
2438 2989
2439 return ret; 2990 return ret;
2440} 2991}
@@ -2449,16 +3000,17 @@ static ssize_t rbd_add(struct bus_type *bus,
2449 size_t mon_addrs_size = 0; 3000 size_t mon_addrs_size = 0;
2450 struct ceph_osd_client *osdc; 3001 struct ceph_osd_client *osdc;
2451 int rc = -ENOMEM; 3002 int rc = -ENOMEM;
3003 char *snap_name;
2452 3004
2453 if (!try_module_get(THIS_MODULE)) 3005 if (!try_module_get(THIS_MODULE))
2454 return -ENODEV; 3006 return -ENODEV;
2455 3007
2456 options = kmalloc(count, GFP_KERNEL); 3008 options = kmalloc(count, GFP_KERNEL);
2457 if (!options) 3009 if (!options)
2458 goto err_nomem; 3010 goto err_out_mem;
2459 rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 3011 rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
2460 if (!rbd_dev) 3012 if (!rbd_dev)
2461 goto err_nomem; 3013 goto err_out_mem;
2462 3014
2463 /* static rbd_device initialization */ 3015 /* static rbd_device initialization */
2464 spin_lock_init(&rbd_dev->lock); 3016 spin_lock_init(&rbd_dev->lock);
@@ -2466,27 +3018,18 @@ static ssize_t rbd_add(struct bus_type *bus,
2466 INIT_LIST_HEAD(&rbd_dev->snaps); 3018 INIT_LIST_HEAD(&rbd_dev->snaps);
2467 init_rwsem(&rbd_dev->header_rwsem); 3019 init_rwsem(&rbd_dev->header_rwsem);
2468 3020
2469 /* generate unique id: find highest unique id, add one */
2470 rbd_id_get(rbd_dev);
2471
2472 /* Fill in the device name, now that we have its id. */
2473 BUILD_BUG_ON(DEV_NAME_LEN
2474 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
2475 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
2476
2477 /* parse add command */ 3021 /* parse add command */
2478 rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size, 3022 snap_name = rbd_add_parse_args(rbd_dev, buf,
2479 options, count); 3023 &mon_addrs, &mon_addrs_size, options, count);
2480 if (rc) 3024 if (IS_ERR(snap_name)) {
2481 goto err_put_id; 3025 rc = PTR_ERR(snap_name);
2482 3026 goto err_out_mem;
2483 rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
2484 options);
2485 if (IS_ERR(rbd_dev->rbd_client)) {
2486 rc = PTR_ERR(rbd_dev->rbd_client);
2487 goto err_put_id;
2488 } 3027 }
2489 3028
3029 rc = rbd_get_client(rbd_dev, mon_addrs, mon_addrs_size - 1, options);
3030 if (rc < 0)
3031 goto err_out_args;
3032
2490 /* pick the pool */ 3033 /* pick the pool */
2491 osdc = &rbd_dev->rbd_client->client->osdc; 3034 osdc = &rbd_dev->rbd_client->client->osdc;
2492 rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name); 3035 rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
@@ -2494,23 +3037,53 @@ static ssize_t rbd_add(struct bus_type *bus,
2494 goto err_out_client; 3037 goto err_out_client;
2495 rbd_dev->pool_id = rc; 3038 rbd_dev->pool_id = rc;
2496 3039
2497 /* register our block device */ 3040 rc = rbd_dev_probe(rbd_dev);
2498 rc = register_blkdev(0, rbd_dev->name);
2499 if (rc < 0) 3041 if (rc < 0)
2500 goto err_out_client; 3042 goto err_out_client;
3043 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
3044
3045 /* no need to lock here, as rbd_dev is not registered yet */
3046 rc = rbd_dev_snaps_update(rbd_dev);
3047 if (rc)
3048 goto err_out_header;
3049
3050 rc = rbd_dev_set_mapping(rbd_dev, snap_name);
3051 if (rc)
3052 goto err_out_header;
3053
3054 /* generate unique id: find highest unique id, add one */
3055 rbd_dev_id_get(rbd_dev);
3056
3057 /* Fill in the device name, now that we have its id. */
3058 BUILD_BUG_ON(DEV_NAME_LEN
3059 < sizeof (RBD_DRV_NAME) + MAX_INT_FORMAT_WIDTH);
3060 sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->dev_id);
3061
3062 /* Get our block major device number. */
3063
3064 rc = register_blkdev(0, rbd_dev->name);
3065 if (rc < 0)
3066 goto err_out_id;
2501 rbd_dev->major = rc; 3067 rbd_dev->major = rc;
2502 3068
2503 rc = rbd_bus_add_dev(rbd_dev); 3069 /* Set up the blkdev mapping. */
3070
3071 rc = rbd_init_disk(rbd_dev);
2504 if (rc) 3072 if (rc)
2505 goto err_out_blkdev; 3073 goto err_out_blkdev;
2506 3074
3075 rc = rbd_bus_add_dev(rbd_dev);
3076 if (rc)
3077 goto err_out_disk;
3078
2507 /* 3079 /*
2508 * At this point cleanup in the event of an error is the job 3080 * At this point cleanup in the event of an error is the job
2509 * of the sysfs code (initiated by rbd_bus_del_dev()). 3081 * of the sysfs code (initiated by rbd_bus_del_dev()).
2510 *
2511 * Set up and announce blkdev mapping.
2512 */ 3082 */
2513 rc = rbd_init_disk(rbd_dev); 3083
3084 down_write(&rbd_dev->header_rwsem);
3085 rc = rbd_dev_snaps_register(rbd_dev);
3086 up_write(&rbd_dev->header_rwsem);
2514 if (rc) 3087 if (rc)
2515 goto err_out_bus; 3088 goto err_out_bus;
2516 3089
@@ -2518,6 +3091,13 @@ static ssize_t rbd_add(struct bus_type *bus,
2518 if (rc) 3091 if (rc)
2519 goto err_out_bus; 3092 goto err_out_bus;
2520 3093
3094 /* Everything's ready. Announce the disk to the world. */
3095
3096 add_disk(rbd_dev->disk);
3097
3098 pr_info("%s: added with size 0x%llx\n", rbd_dev->disk->disk_name,
3099 (unsigned long long) rbd_dev->mapping.size);
3100
2521 return count; 3101 return count;
2522 3102
2523err_out_bus: 3103err_out_bus:
@@ -2527,19 +3107,23 @@ err_out_bus:
2527 kfree(options); 3107 kfree(options);
2528 return rc; 3108 return rc;
2529 3109
3110err_out_disk:
3111 rbd_free_disk(rbd_dev);
2530err_out_blkdev: 3112err_out_blkdev:
2531 unregister_blkdev(rbd_dev->major, rbd_dev->name); 3113 unregister_blkdev(rbd_dev->major, rbd_dev->name);
3114err_out_id:
3115 rbd_dev_id_put(rbd_dev);
3116err_out_header:
3117 rbd_header_free(&rbd_dev->header);
2532err_out_client: 3118err_out_client:
3119 kfree(rbd_dev->header_name);
2533 rbd_put_client(rbd_dev); 3120 rbd_put_client(rbd_dev);
2534err_put_id: 3121 kfree(rbd_dev->image_id);
2535 if (rbd_dev->pool_name) { 3122err_out_args:
2536 kfree(rbd_dev->snap_name); 3123 kfree(rbd_dev->mapping.snap_name);
2537 kfree(rbd_dev->header_name); 3124 kfree(rbd_dev->image_name);
2538 kfree(rbd_dev->image_name); 3125 kfree(rbd_dev->pool_name);
2539 kfree(rbd_dev->pool_name); 3126err_out_mem:
2540 }
2541 rbd_id_put(rbd_dev);
2542err_nomem:
2543 kfree(rbd_dev); 3127 kfree(rbd_dev);
2544 kfree(options); 3128 kfree(options);
2545 3129
@@ -2585,12 +3169,16 @@ static void rbd_dev_release(struct device *dev)
2585 rbd_free_disk(rbd_dev); 3169 rbd_free_disk(rbd_dev);
2586 unregister_blkdev(rbd_dev->major, rbd_dev->name); 3170 unregister_blkdev(rbd_dev->major, rbd_dev->name);
2587 3171
3172 /* release allocated disk header fields */
3173 rbd_header_free(&rbd_dev->header);
3174
2588 /* done with the id, and with the rbd_dev */ 3175 /* done with the id, and with the rbd_dev */
2589 kfree(rbd_dev->snap_name); 3176 kfree(rbd_dev->mapping.snap_name);
3177 kfree(rbd_dev->image_id);
2590 kfree(rbd_dev->header_name); 3178 kfree(rbd_dev->header_name);
2591 kfree(rbd_dev->pool_name); 3179 kfree(rbd_dev->pool_name);
2592 kfree(rbd_dev->image_name); 3180 kfree(rbd_dev->image_name);
2593 rbd_id_put(rbd_dev); 3181 rbd_dev_id_put(rbd_dev);
2594 kfree(rbd_dev); 3182 kfree(rbd_dev);
2595 3183
2596 /* release module ref */ 3184 /* release module ref */
@@ -2628,47 +3216,7 @@ static ssize_t rbd_remove(struct bus_type *bus,
2628 3216
2629done: 3217done:
2630 mutex_unlock(&ctl_mutex); 3218 mutex_unlock(&ctl_mutex);
2631 return ret;
2632}
2633 3219
2634static ssize_t rbd_snap_add(struct device *dev,
2635 struct device_attribute *attr,
2636 const char *buf,
2637 size_t count)
2638{
2639 struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
2640 int ret;
2641 char *name = kmalloc(count + 1, GFP_KERNEL);
2642 if (!name)
2643 return -ENOMEM;
2644
2645 snprintf(name, count, "%s", buf);
2646
2647 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
2648
2649 ret = rbd_header_add_snap(rbd_dev,
2650 name, GFP_KERNEL);
2651 if (ret < 0)
2652 goto err_unlock;
2653
2654 ret = __rbd_refresh_header(rbd_dev, NULL);
2655 if (ret < 0)
2656 goto err_unlock;
2657
2658 /* shouldn't hold ctl_mutex when notifying.. notify might
2659 trigger a watch callback that would need to get that mutex */
2660 mutex_unlock(&ctl_mutex);
2661
2662 /* make a best effort, don't error if failed */
2663 rbd_req_sync_notify(rbd_dev);
2664
2665 ret = count;
2666 kfree(name);
2667 return ret;
2668
2669err_unlock:
2670 mutex_unlock(&ctl_mutex);
2671 kfree(name);
2672 return ret; 3220 return ret;
2673} 3221}
2674 3222
diff --git a/drivers/block/rbd_types.h b/drivers/block/rbd_types.h
index 0924e9e41a60..cbe77fa105ba 100644
--- a/drivers/block/rbd_types.h
+++ b/drivers/block/rbd_types.h
@@ -15,15 +15,30 @@
15 15
16#include <linux/types.h> 16#include <linux/types.h>
17 17
18/* For format version 2, rbd image 'foo' consists of objects
19 * rbd_id.foo - id of image
20 * rbd_header.<id> - image metadata
21 * rbd_data.<id>.0000000000000000
22 * rbd_data.<id>.0000000000000001
23 * ... - data
24 * Clients do not access header data directly in rbd format 2.
25 */
26
27#define RBD_HEADER_PREFIX "rbd_header."
28#define RBD_DATA_PREFIX "rbd_data."
29#define RBD_ID_PREFIX "rbd_id."
30
18/* 31/*
19 * rbd image 'foo' consists of objects 32 * For format version 1, rbd image 'foo' consists of objects
20 * foo.rbd - image metadata 33 * foo.rbd - image metadata
21 * foo.00000000 34 * rb.<idhi>.<idlo>.00000000
22 * foo.00000001 35 * rb.<idhi>.<idlo>.00000001
23 * ... - data 36 * ... - data
37 * There is no notion of a persistent image id in rbd format 1.
24 */ 38 */
25 39
26#define RBD_SUFFIX ".rbd" 40#define RBD_SUFFIX ".rbd"
41
27#define RBD_DIRECTORY "rbd_directory" 42#define RBD_DIRECTORY "rbd_directory"
28#define RBD_INFO "rbd_info" 43#define RBD_INFO "rbd_info"
29 44
@@ -47,7 +62,7 @@ struct rbd_image_snap_ondisk {
47 62
48struct rbd_image_header_ondisk { 63struct rbd_image_header_ondisk {
49 char text[40]; 64 char text[40];
50 char block_name[24]; 65 char object_prefix[24];
51 char signature[4]; 66 char signature[4];
52 char version[8]; 67 char version[8];
53 struct { 68 struct {