aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-01-24 17:13:36 -0500
committerSage Weil <sage@inktank.com>2013-05-02 00:18:48 -0400
commit8b3e1a56982d0eafff0afb0ff9e87c8b944a9bdc (patch)
tree5c3af92d78d35f27274533eec7d8353c124266ba
parent2f82ee54d95c9430838e4580f3bcc196ad36e4f2 (diff)
rbd: implement layered reads
Implement layered read requests for format 2 rbd images. If an rbd image is a clone of a snapshot, the snapshot will be the clone's "parent" image. When an object read request on a clone comes back with ENOENT it indicates that the clone is not yet populated with that portion of the image's data, and the parent image should be consulted to satisfy the read. When this occurs, a new image request is created, directed to the parent image. The offset and length of the image are the same as the image-relative offset and length of the object request that produced ENOENT. Data from the parent image therefore satisfies the object read request for the original image request. While this code works, it will not be active until we enable the layering feature (by adding RBD_FEATURE_LAYERING to the value of RBD_FEATURES_SUPPORTED). Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--drivers/block/rbd.c97
1 files changed, 85 insertions, 12 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5c129c54279c..13a381b2a779 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -398,6 +398,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
398# define rbd_assert(expr) ((void) 0) 398# define rbd_assert(expr) ((void) 0)
399#endif /* !RBD_DEBUG */ 399#endif /* !RBD_DEBUG */
400 400
401static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
402
401static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver); 403static int rbd_dev_refresh(struct rbd_device *rbd_dev, u64 *hver);
402static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); 404static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver);
403 405
@@ -1336,9 +1338,15 @@ static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request)
1336 1338
1337static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) 1339static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
1338{ 1340{
1339 dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, 1341 struct rbd_img_request *img_request = obj_request->img_request;
1340 obj_request->result, obj_request->xferred, obj_request->length); 1342 bool layered = img_request && img_request_layered_test(img_request);
1341 if (obj_request->img_request) 1343
1344 dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
1345 obj_request, img_request, obj_request->result,
1346 obj_request->xferred, obj_request->length);
1347 if (layered && obj_request->result == -ENOENT)
1348 rbd_img_parent_read(obj_request);
1349 else if (img_request)
1342 rbd_img_obj_request_read_callback(obj_request); 1350 rbd_img_obj_request_read_callback(obj_request);
1343 else 1351 else
1344 obj_request_done_set(obj_request); 1352 obj_request_done_set(obj_request);
@@ -1349,9 +1357,8 @@ static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
1349 dout("%s: obj %p result %d %llu\n", __func__, obj_request, 1357 dout("%s: obj %p result %d %llu\n", __func__, obj_request,
1350 obj_request->result, obj_request->length); 1358 obj_request->result, obj_request->length);
1351 /* 1359 /*
1352 * There is no such thing as a successful short write. 1360 * There is no such thing as a successful short write. Set
1353 * Our xferred value is the number of bytes transferred 1361 * it to our originally-requested length.
1354 * back. Set it to our originally-requested length.
1355 */ 1362 */
1356 obj_request->xferred = obj_request->length; 1363 obj_request->xferred = obj_request->length;
1357 obj_request_done_set(obj_request); 1364 obj_request_done_set(obj_request);
@@ -1391,7 +1398,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
1391 * passed to blk_end_request(), which takes an unsigned int. 1398 * passed to blk_end_request(), which takes an unsigned int.
1392 */ 1399 */
1393 obj_request->xferred = osd_req->r_reply_op_len[0]; 1400 obj_request->xferred = osd_req->r_reply_op_len[0];
1394 rbd_assert(obj_request->xferred < (u64) UINT_MAX); 1401 rbd_assert(obj_request->xferred < (u64)UINT_MAX);
1395 opcode = osd_req->r_ops[0].op; 1402 opcode = osd_req->r_ops[0].op;
1396 switch (opcode) { 1403 switch (opcode) {
1397 case CEPH_OSD_OP_READ: 1404 case CEPH_OSD_OP_READ:
@@ -1607,7 +1614,6 @@ static struct rbd_img_request *rbd_img_request_create(
1607 INIT_LIST_HEAD(&img_request->obj_requests); 1614 INIT_LIST_HEAD(&img_request->obj_requests);
1608 kref_init(&img_request->kref); 1615 kref_init(&img_request->kref);
1609 1616
1610 (void) img_request_layered_test(img_request); /* Avoid a warning */
1611 rbd_img_request_get(img_request); /* Avoid a warning */ 1617 rbd_img_request_get(img_request); /* Avoid a warning */
1612 rbd_img_request_put(img_request); /* TEMPORARY */ 1618 rbd_img_request_put(img_request); /* TEMPORARY */
1613 1619
@@ -1635,6 +1641,9 @@ static void rbd_img_request_destroy(struct kref *kref)
1635 if (img_request_write_test(img_request)) 1641 if (img_request_write_test(img_request))
1636 ceph_put_snap_context(img_request->snapc); 1642 ceph_put_snap_context(img_request->snapc);
1637 1643
1644 if (img_request_child_test(img_request))
1645 rbd_obj_request_put(img_request->obj_request);
1646
1638 kfree(img_request); 1647 kfree(img_request);
1639} 1648}
1640 1649
@@ -1643,13 +1652,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
1643 struct rbd_img_request *img_request; 1652 struct rbd_img_request *img_request;
1644 unsigned int xferred; 1653 unsigned int xferred;
1645 int result; 1654 int result;
1655 bool more;
1646 1656
1647 rbd_assert(obj_request_img_data_test(obj_request)); 1657 rbd_assert(obj_request_img_data_test(obj_request));
1648 img_request = obj_request->img_request; 1658 img_request = obj_request->img_request;
1649 1659
1650 rbd_assert(!img_request_child_test(img_request));
1651 rbd_assert(img_request->rq != NULL);
1652
1653 rbd_assert(obj_request->xferred <= (u64)UINT_MAX); 1660 rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
1654 xferred = (unsigned int)obj_request->xferred; 1661 xferred = (unsigned int)obj_request->xferred;
1655 result = obj_request->result; 1662 result = obj_request->result;
@@ -1666,7 +1673,15 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
1666 img_request->result = result; 1673 img_request->result = result;
1667 } 1674 }
1668 1675
1669 return blk_end_request(img_request->rq, result, xferred); 1676 if (img_request_child_test(img_request)) {
1677 rbd_assert(img_request->obj_request != NULL);
1678 more = obj_request->which < img_request->obj_request_count - 1;
1679 } else {
1680 rbd_assert(img_request->rq != NULL);
1681 more = blk_end_request(img_request->rq, result, xferred);
1682 }
1683
1684 return more;
1670} 1685}
1671 1686
1672static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) 1687static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
@@ -1811,6 +1826,64 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request)
1811 return 0; 1826 return 0;
1812} 1827}
1813 1828
1829static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
1830{
1831 struct rbd_obj_request *obj_request;
1832
1833 rbd_assert(img_request_child_test(img_request));
1834
1835 obj_request = img_request->obj_request;
1836 rbd_assert(obj_request != NULL);
1837 obj_request->result = img_request->result;
1838 obj_request->xferred = img_request->xferred;
1839
1840 rbd_img_obj_request_read_callback(obj_request);
1841 rbd_obj_request_complete(obj_request);
1842}
1843
1844static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
1845{
1846 struct rbd_device *rbd_dev;
1847 struct rbd_img_request *img_request;
1848 int result;
1849
1850 rbd_assert(obj_request_img_data_test(obj_request));
1851 rbd_assert(obj_request->img_request != NULL);
1852 rbd_assert(obj_request->result == (s32) -ENOENT);
1853 rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
1854
1855 rbd_dev = obj_request->img_request->rbd_dev;
1856 rbd_assert(rbd_dev->parent != NULL);
1857 /* rbd_read_finish(obj_request, obj_request->length); */
1858 img_request = rbd_img_request_create(rbd_dev->parent,
1859 obj_request->img_offset,
1860 obj_request->length,
1861 false, true);
1862 result = -ENOMEM;
1863 if (!img_request)
1864 goto out_err;
1865
1866 rbd_obj_request_get(obj_request);
1867 img_request->obj_request = obj_request;
1868
1869 result = rbd_img_request_fill_bio(img_request, obj_request->bio_list);
1870 if (result)
1871 goto out_err;
1872
1873 img_request->callback = rbd_img_parent_read_callback;
1874 result = rbd_img_request_submit(img_request);
1875 if (result)
1876 goto out_err;
1877
1878 return;
1879out_err:
1880 if (img_request)
1881 rbd_img_request_put(img_request);
1882 obj_request->result = result;
1883 obj_request->xferred = 0;
1884 obj_request_done_set(obj_request);
1885}
1886
1814static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, 1887static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
1815 u64 ver, u64 notify_id) 1888 u64 ver, u64 notify_id)
1816{ 1889{