aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-04-19 16:34:50 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:19:14 -0400
commit0eefd470f034cc18349fa1a9e4fda000e963c4e3 (patch)
treeea0bc325fc9ea3d4a8e89dff23c543fa9c6526ba
parent3d7efd18d9df628e30ff36e9e488a8f0e782b678 (diff)
rbd: issue a copyup for layered writes
This implements the main copyup functionality for layered writes. Here we add a copyup_pages field to the object request, which is used only for copyup requests to keep track of the page array containing data read from the parent image. A copyup request is currently the only request rbd has that requires two osd operations. Because of this we handle copyup specially. All image object requests get an osd request allocated when they are created. For a write request, if a copyup is required, the osd request originally allocated is released, and a new one (with room for two osd ops) is allocated to replace it. A new function rbd_osd_req_create_copyup() allocates an osd request suitable for a copyup request. The first op is then filled with a copyup object class method call, supplying the array of pages containing data read from the parent. The second op is filled in with the original write request. The original request otherwise remains intact, and it describes the original write request (found in the second osd op). The presence of the copyup op is sort of implicit; a non-null copyup_pages field could be used to distinguish between a "normal" write request and a request containing both a copyup call and a write. This resolves: http://tracker.ceph.com/issues/3419 Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--drivers/block/rbd.c149
1 files changed, 137 insertions, 12 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 639dd91e7dab..c34719c917b1 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -218,6 +218,7 @@ struct rbd_obj_request {
218 u32 page_count; 218 u32 page_count;
219 }; 219 };
220 }; 220 };
221 struct page **copyup_pages;
221 222
222 struct ceph_osd_request *osd_req; 223 struct ceph_osd_request *osd_req;
223 224
@@ -1498,7 +1499,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
1498 obj_request->result = osd_req->r_result; 1499 obj_request->result = osd_req->r_result;
1499 obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); 1500 obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version);
1500 1501
1501 WARN_ON(osd_req->r_num_ops != 1); /* For now */ 1502 BUG_ON(osd_req->r_num_ops > 2);
1502 1503
1503 /* 1504 /*
1504 * We support a 64-bit length, but ultimately it has to be 1505 * We support a 64-bit length, but ultimately it has to be
@@ -1601,6 +1602,48 @@ static struct ceph_osd_request *rbd_osd_req_create(
1601 return osd_req; 1602 return osd_req;
1602} 1603}
1603 1604
1605/*
1606 * Create a copyup osd request based on the information in the
1607 * object request supplied. A copyup request has two osd ops,
1608 * a copyup method call, and a "normal" write request.
1609 */
1610static struct ceph_osd_request *
1611rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
1612{
1613 struct rbd_img_request *img_request;
1614 struct ceph_snap_context *snapc;
1615 struct rbd_device *rbd_dev;
1616 struct ceph_osd_client *osdc;
1617 struct ceph_osd_request *osd_req;
1618
1619 rbd_assert(obj_request_img_data_test(obj_request));
1620 img_request = obj_request->img_request;
1621 rbd_assert(img_request);
1622 rbd_assert(img_request_write_test(img_request));
1623
1624 /* Allocate and initialize the request, for the two ops */
1625
1626 snapc = img_request->snapc;
1627 rbd_dev = img_request->rbd_dev;
1628 osdc = &rbd_dev->rbd_client->client->osdc;
1629 osd_req = ceph_osdc_alloc_request(osdc, snapc, 2, false, GFP_ATOMIC);
1630 if (!osd_req)
1631 return NULL; /* ENOMEM */
1632
1633 osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
1634 osd_req->r_callback = rbd_osd_req_callback;
1635 osd_req->r_priv = obj_request;
1636
1637 osd_req->r_oid_len = strlen(obj_request->object_name);
1638 rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid));
1639 memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len);
1640
1641 osd_req->r_file_layout = rbd_dev->layout; /* struct */
1642
1643 return osd_req;
1644}
1645
1646
1604static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) 1647static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
1605{ 1648{
1606 ceph_osdc_put_request(osd_req); 1649 ceph_osdc_put_request(osd_req);
@@ -1960,11 +2003,49 @@ out_unwind:
1960} 2003}
1961 2004
1962static void 2005static void
2006rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
2007{
2008 struct rbd_img_request *img_request;
2009 struct rbd_device *rbd_dev;
2010 u64 length;
2011 u32 page_count;
2012
2013 rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
2014 rbd_assert(obj_request_img_data_test(obj_request));
2015 img_request = obj_request->img_request;
2016 rbd_assert(img_request);
2017
2018 rbd_dev = img_request->rbd_dev;
2019 rbd_assert(rbd_dev);
2020 length = (u64)1 << rbd_dev->header.obj_order;
2021 page_count = (u32)calc_pages_for(0, length);
2022
2023 rbd_assert(obj_request->copyup_pages);
2024 ceph_release_page_vector(obj_request->copyup_pages, page_count);
2025 obj_request->copyup_pages = NULL;
2026
2027 /*
2028 * We want the transfer count to reflect the size of the
2029 * original write request. There is no such thing as a
2030 * successful short write, so if the request was successful
2031 * we can just set it to the originally-requested length.
2032 */
2033 if (!obj_request->result)
2034 obj_request->xferred = obj_request->length;
2035
2036 /* Finish up with the normal image object callback */
2037
2038 rbd_img_obj_callback(obj_request);
2039}
2040
2041static void
1963rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) 2042rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
1964{ 2043{
1965 struct rbd_obj_request *orig_request; 2044 struct rbd_obj_request *orig_request;
2045 struct ceph_osd_request *osd_req;
2046 struct ceph_osd_client *osdc;
2047 struct rbd_device *rbd_dev;
1966 struct page **pages; 2048 struct page **pages;
1967 u32 page_count;
1968 int result; 2049 int result;
1969 u64 obj_size; 2050 u64 obj_size;
1970 u64 xferred; 2051 u64 xferred;
@@ -1979,25 +2060,60 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
1979 2060
1980 orig_request = img_request->obj_request; 2061 orig_request = img_request->obj_request;
1981 rbd_assert(orig_request != NULL); 2062 rbd_assert(orig_request != NULL);
1982 2063 rbd_assert(orig_request->type == OBJ_REQUEST_BIO);
1983 result = img_request->result; 2064 result = img_request->result;
1984 obj_size = img_request->length; 2065 obj_size = img_request->length;
1985 xferred = img_request->xferred; 2066 xferred = img_request->xferred;
1986 2067
2068 rbd_dev = img_request->rbd_dev;
2069 rbd_assert(rbd_dev);
2070 rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order);
2071
1987 rbd_img_request_put(img_request); 2072 rbd_img_request_put(img_request);
1988 2073
1989 obj_request_existence_set(orig_request, true); 2074 if (result)
2075 goto out_err;
2076
2077 /* Allocate the new copyup osd request for the original request */
1990 2078
1991 page_count = (u32)calc_pages_for(0, obj_size); 2079 result = -ENOMEM;
1992 ceph_release_page_vector(pages, page_count); 2080 rbd_assert(!orig_request->osd_req);
2081 osd_req = rbd_osd_req_create_copyup(orig_request);
2082 if (!osd_req)
2083 goto out_err;
2084 orig_request->osd_req = osd_req;
2085 orig_request->copyup_pages = pages;
1993 2086
1994 /* Resubmit the original request (for now). */ 2087 /* Initialize the copyup op */
1995 2088
1996 orig_request->result = rbd_img_obj_request_submit(orig_request); 2089 osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
1997 if (orig_request->result) { 2090 osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0,
1998 obj_request_done_set(orig_request); 2091 false, false);
1999 rbd_obj_request_complete(orig_request); 2092
2000 } 2093 /* Then the original write request op */
2094
2095 osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE,
2096 orig_request->offset,
2097 orig_request->length, 0, 0);
2098 osd_req_op_extent_osd_data_bio(osd_req, 1, orig_request->bio_list,
2099 orig_request->length);
2100
2101 rbd_osd_req_format_write(orig_request);
2102
2103 /* All set, send it off. */
2104
2105 orig_request->callback = rbd_img_obj_copyup_callback;
2106 osdc = &rbd_dev->rbd_client->client->osdc;
2107 result = rbd_obj_request_submit(osdc, orig_request);
2108 if (!result)
2109 return;
2110out_err:
2111 /* Record the error code and complete the request */
2112
2113 orig_request->result = result;
2114 orig_request->xferred = 0;
2115 obj_request_done_set(orig_request);
2116 rbd_obj_request_complete(orig_request);
2001} 2117}
2002 2118
2003/* 2119/*
@@ -2034,6 +2150,15 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2034 rbd_assert(rbd_dev->parent != NULL); 2150 rbd_assert(rbd_dev->parent != NULL);
2035 2151
2036 /* 2152 /*
2153 * First things first. The original osd request is of no
2154 * use to use any more, we'll need a new one that can hold
2155 * the two ops in a copyup request. We'll get that later,
2156 * but for now we can release the old one.
2157 */
2158 rbd_osd_req_destroy(obj_request->osd_req);
2159 obj_request->osd_req = NULL;
2160
2161 /*
2037 * Determine the byte range covered by the object in the 2162 * Determine the byte range covered by the object in the
2038 * child image to which the original request was to be sent. 2163 * child image to which the original request was to be sent.
2039 */ 2164 */