aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-05-06 18:40:33 -0400
committerAlex Elder <elder@inktank.com>2013-05-13 16:06:33 -0400
commit642a25375f4c863607d2170f4471aec8becf7788 (patch)
tree55987da93859800187aa8244db9be052852d7289
parent70cf49cfc7a4d1eb4aeea6cd128b88230be9d0b1 (diff)
rbd: get parent info on refresh
Get parent info for format 2 images on every refresh (rather than just during the initial probe). This will be needed to detect the disappearance of the parent image in the event a mapped image becomes unlayered (i.e., flattened). Avoid leaking the previous parent spec on the second and subsequent times this information is requested by dropping the previous one (if any) before updating it. (Also, extract the pool id into a local variable before assigning it into the parent spec.) Switch to using a non-zero parent overlap value rather than the existence of a parent (a non-null parent_spec pointer) to determine whether to mark a request layered. It will soon be possible for a layered image to become unlayered while a request is in flight. This means that the layered flag for an image request indicates that there was a non-zero parent overlap at the time the image request was created. The parent overlap can change thereafter, which may lead to special handling at request submission or completion time. This and the next several patches are related to: http://tracker.ceph.com/issues/3763 NOTE: If an error occurs while refreshing the parent info (i.e., requesting it after initial probe), the old parent info will persist. This is not really correct, and is a scenario that needs to be addressed. For now we'll assert that the failure mode is unlikely, but the issue has been documented in tracker issue 5040. Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--drivers/block/rbd.c67
1 files changed, 37 insertions, 30 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b67ecda1e7ef..fcef63c2c30b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1873,7 +1873,7 @@ static struct rbd_img_request *rbd_img_request_create(
1873 } 1873 }
1874 if (child_request) 1874 if (child_request)
1875 img_request_child_set(img_request); 1875 img_request_child_set(img_request);
1876 if (rbd_dev->parent_spec) 1876 if (rbd_dev->parent_overlap)
1877 img_request_layered_set(img_request); 1877 img_request_layered_set(img_request);
1878 spin_lock_init(&img_request->completion_lock); 1878 spin_lock_init(&img_request->completion_lock);
1879 img_request->next_completion = 0; 1879 img_request->next_completion = 0;
@@ -3613,6 +3613,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
3613 __le64 snapid; 3613 __le64 snapid;
3614 void *p; 3614 void *p;
3615 void *end; 3615 void *end;
3616 u64 pool_id;
3616 char *image_id; 3617 char *image_id;
3617 u64 overlap; 3618 u64 overlap;
3618 int ret; 3619 int ret;
@@ -3643,18 +3644,19 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
3643 p = reply_buf; 3644 p = reply_buf;
3644 end = reply_buf + ret; 3645 end = reply_buf + ret;
3645 ret = -ERANGE; 3646 ret = -ERANGE;
3646 ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); 3647 ceph_decode_64_safe(&p, end, pool_id, out_err);
3647 if (parent_spec->pool_id == CEPH_NOPOOL) 3648 if (pool_id == CEPH_NOPOOL)
3648 goto out; /* No parent? No problem. */ 3649 goto out; /* No parent? No problem. */
3649 3650
3650 /* The ceph file layout needs to fit pool id in 32 bits */ 3651 /* The ceph file layout needs to fit pool id in 32 bits */
3651 3652
3652 ret = -EIO; 3653 ret = -EIO;
3653 if (parent_spec->pool_id > (u64)U32_MAX) { 3654 if (pool_id > (u64)U32_MAX) {
3654 rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", 3655 rbd_warn(NULL, "parent pool id too large (%llu > %u)\n",
3655 (unsigned long long)parent_spec->pool_id, U32_MAX); 3656 (unsigned long long)pool_id, U32_MAX);
3656 goto out_err; 3657 goto out_err;
3657 } 3658 }
3659 parent_spec->pool_id = pool_id;
3658 3660
3659 image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 3661 image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
3660 if (IS_ERR(image_id)) { 3662 if (IS_ERR(image_id)) {
@@ -3666,6 +3668,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
3666 ceph_decode_64_safe(&p, end, overlap, out_err); 3668 ceph_decode_64_safe(&p, end, overlap, out_err);
3667 3669
3668 if (overlap) { 3670 if (overlap) {
3671 rbd_spec_put(rbd_dev->parent_spec);
3669 rbd_dev->parent_spec = parent_spec; 3672 rbd_dev->parent_spec = parent_spec;
3670 parent_spec = NULL; /* rbd_dev now owns this */ 3673 parent_spec = NULL; /* rbd_dev now owns this */
3671 rbd_dev->parent_overlap = overlap; 3674 rbd_dev->parent_overlap = overlap;
@@ -4034,17 +4037,43 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
4034 goto out; 4037 goto out;
4035 } 4038 }
4036 4039
4040 /*
4041 * If the image supports layering, get the parent info. We
4042 * need to probe the first time regardless. Thereafter we
4043 * only need to if there's a parent, to see if it has
4044 * disappeared due to the mapped image getting flattened.
4045 */
4046 if (rbd_dev->header.features & RBD_FEATURE_LAYERING &&
4047 (first_time || rbd_dev->parent_spec)) {
4048 bool warn;
4049
4050 ret = rbd_dev_v2_parent_info(rbd_dev);
4051 if (ret)
4052 goto out;
4053
4054 /*
4055 * Print a warning if this is the initial probe and
4056 * the image has a parent. Don't print it if the
4057 * image now being probed is itself a parent. We
4058 * can tell at this point because we won't know its
4059 * pool name yet (just its pool id).
4060 */
4061 warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name;
4062 if (first_time && warn)
4063 rbd_warn(rbd_dev, "WARNING: kernel layering "
4064 "is EXPERIMENTAL!");
4065 }
4066
4037 ret = rbd_dev_v2_image_size(rbd_dev); 4067 ret = rbd_dev_v2_image_size(rbd_dev);
4038 if (ret) 4068 if (ret)
4039 goto out; 4069 goto out;
4070
4040 if (rbd_dev->spec->snap_id == CEPH_NOSNAP) 4071 if (rbd_dev->spec->snap_id == CEPH_NOSNAP)
4041 if (rbd_dev->mapping.size != rbd_dev->header.image_size) 4072 if (rbd_dev->mapping.size != rbd_dev->header.image_size)
4042 rbd_dev->mapping.size = rbd_dev->header.image_size; 4073 rbd_dev->mapping.size = rbd_dev->header.image_size;
4043 4074
4044 ret = rbd_dev_v2_snap_context(rbd_dev); 4075 ret = rbd_dev_v2_snap_context(rbd_dev);
4045 dout("rbd_dev_v2_snap_context returned %d\n", ret); 4076 dout("rbd_dev_v2_snap_context returned %d\n", ret);
4046 if (ret)
4047 goto out;
4048out: 4077out:
4049 up_write(&rbd_dev->header_rwsem); 4078 up_write(&rbd_dev->header_rwsem);
4050 4079
@@ -4498,24 +4527,6 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
4498 if (ret) 4527 if (ret)
4499 goto out_err; 4528 goto out_err;
4500 4529
4501 /* If the image supports layering, get the parent info */
4502
4503 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
4504 ret = rbd_dev_v2_parent_info(rbd_dev);
4505 if (ret)
4506 goto out_err;
4507 /*
4508 * Print a warning if this image has a parent.
4509 * Don't print it if the image now being probed
4510 * is itself a parent. We can tell at this point
4511 * because we won't know its pool name yet (just its
4512 * pool id).
4513 */
4514 if (rbd_dev->parent_spec && rbd_dev->spec->pool_name)
4515 rbd_warn(rbd_dev, "WARNING: kernel layering "
4516 "is EXPERIMENTAL!");
4517 }
4518
4519 /* If the image supports fancy striping, get its parameters */ 4530 /* If the image supports fancy striping, get its parameters */
4520 4531
4521 if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { 4532 if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
@@ -4527,11 +4538,7 @@ static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
4527 4538
4528 return 0; 4539 return 0;
4529out_err: 4540out_err:
4530 rbd_dev->parent_overlap = 0; 4541 rbd_dev->header.features = 0;
4531 rbd_spec_put(rbd_dev->parent_spec);
4532 rbd_dev->parent_spec = NULL;
4533 kfree(rbd_dev->header_name);
4534 rbd_dev->header_name = NULL;
4535 kfree(rbd_dev->header.object_prefix); 4542 kfree(rbd_dev->header.object_prefix);
4536 rbd_dev->header.object_prefix = NULL; 4543 rbd_dev->header.object_prefix = NULL;
4537 4544