aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Elder <elder@inktank.com>2013-02-11 13:33:24 -0500
committerSage Weil <sage@inktank.com>2013-05-02 00:19:04 -0400
commitc5b5ef6c51124e61829632251098f8b5efecae8a (patch)
tree344901c39c537f0dd6fe709a6054d48f519da56e
parent5679c59f608f2fedff313e59b374257f1c945234 (diff)
rbd: issue stat request before layered write
This is a step toward fully implementing layered writes. Add checks before request submission for the object(s) associated with an image request. For write requests, if we don't know that the target object exists, issue a STAT request to find out. When that request completes, mark the known and exists flags for the original object request accordingly and re-submit the object request. (Note that this still does the existence check only; the copyup operation is not yet done.) A new object request is created to perform the existence check. A pointer to the original request is added to that object request to allow the stat request to re-issue the original request after updating its flags. If there is a failure with the stat request the error code is stored with the original request, which is then completed. This resolves: http://tracker.ceph.com/issues/3418 Signed-off-by: Alex Elder <elder@inktank.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
-rw-r--r--drivers/block/rbd.c163
1 files changed, 155 insertions, 8 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b1b8ef864d58..449847badcd8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -183,9 +183,31 @@ struct rbd_obj_request {
183 u64 length; /* bytes from offset */ 183 u64 length; /* bytes from offset */
184 unsigned long flags; 184 unsigned long flags;
185 185
186 struct rbd_img_request *img_request; 186 /*
187 u64 img_offset; /* image relative offset */ 187 * An object request associated with an image will have its
188 struct list_head links; /* img_request->obj_requests */ 188 * img_data flag set; a standalone object request will not.
189 *
190 * A standalone object request will have which == BAD_WHICH
191 * and a null obj_request pointer.
192 *
193 * An object request initiated in support of a layered image
194 * object (to check for its existence before a write) will
195 * have which == BAD_WHICH and a non-null obj_request pointer.
196 *
197 * Finally, an object request for rbd image data will have
198 * which != BAD_WHICH, and will have a non-null img_request
199 * pointer. The value of which will be in the range
200 * 0..(img_request->obj_request_count-1).
201 */
202 union {
203 struct rbd_obj_request *obj_request; /* STAT op */
204 struct {
205 struct rbd_img_request *img_request;
206 u64 img_offset;
207 /* links for img_request->obj_requests list */
208 struct list_head links;
209 };
210 };
189 u32 which; /* posn image request list */ 211 u32 which; /* posn image request list */
190 212
191 enum obj_request_type type; 213 enum obj_request_type type;
@@ -1656,10 +1678,6 @@ static struct rbd_img_request *rbd_img_request_create(
1656 INIT_LIST_HEAD(&img_request->obj_requests); 1678 INIT_LIST_HEAD(&img_request->obj_requests);
1657 kref_init(&img_request->kref); 1679 kref_init(&img_request->kref);
1658 1680
1659 (void) obj_request_existence_set;
1660 (void) obj_request_known_test;
1661 (void) obj_request_exists_test;
1662
1663 rbd_img_request_get(img_request); /* Avoid a warning */ 1681 rbd_img_request_get(img_request); /* Avoid a warning */
1664 rbd_img_request_put(img_request); /* TEMPORARY */ 1682 rbd_img_request_put(img_request); /* TEMPORARY */
1665 1683
@@ -1847,18 +1865,147 @@ out_unwind:
1847 return -ENOMEM; 1865 return -ENOMEM;
1848} 1866}
1849 1867
1868static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
1869{
1870 struct rbd_device *rbd_dev;
1871 struct ceph_osd_client *osdc;
1872 struct rbd_obj_request *orig_request;
1873 int result;
1874
1875 rbd_assert(!obj_request_img_data_test(obj_request));
1876
1877 /*
1878 * All we need from the object request is the original
1879 * request and the result of the STAT op. Grab those, then
1880 * we're done with the request.
1881 */
1882 orig_request = obj_request->obj_request;
1883 obj_request->obj_request = NULL;
1884 rbd_assert(orig_request);
1885 rbd_assert(orig_request->img_request);
1886
1887 result = obj_request->result;
1888 obj_request->result = 0;
1889
1890 dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
1891 obj_request, orig_request, result,
1892 obj_request->xferred, obj_request->length);
1893 rbd_obj_request_put(obj_request);
1894
1895 rbd_assert(orig_request);
1896 rbd_assert(orig_request->img_request);
1897 rbd_dev = orig_request->img_request->rbd_dev;
1898 osdc = &rbd_dev->rbd_client->client->osdc;
1899
1900 /*
1901 * Our only purpose here is to determine whether the object
1902 * exists, and we don't want to treat the non-existence as
1903 * an error. If something else comes back, transfer the
1904 * error to the original request and complete it now.
1905 */
1906 if (!result) {
1907 obj_request_existence_set(orig_request, true);
1908 } else if (result == -ENOENT) {
1909 obj_request_existence_set(orig_request, false);
1910 } else if (result) {
1911 orig_request->result = result;
1912 goto out_err;
1913 }
1914
1915 /*
1916 * Resubmit the original request now that we have recorded
1917 * whether the target object exists.
1918 */
1919 orig_request->result = rbd_obj_request_submit(osdc, orig_request);
1920out_err:
1921 if (orig_request->result)
1922 rbd_obj_request_complete(orig_request);
1923 rbd_obj_request_put(orig_request);
1924}
1925
1926static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
1927{
1928 struct rbd_obj_request *stat_request;
1929 struct rbd_device *rbd_dev;
1930 struct ceph_osd_client *osdc;
1931 struct page **pages = NULL;
1932 u32 page_count;
1933 size_t size;
1934 int ret;
1935
1936 /*
1937 * The response data for a STAT call consists of:
1938 * le64 length;
1939 * struct {
1940 * le32 tv_sec;
1941 * le32 tv_nsec;
1942 * } mtime;
1943 */
1944 size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
1945 page_count = (u32)calc_pages_for(0, size);
1946 pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
1947 if (IS_ERR(pages))
1948 return PTR_ERR(pages);
1949
1950 ret = -ENOMEM;
1951 stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
1952 OBJ_REQUEST_PAGES);
1953 if (!stat_request)
1954 goto out;
1955
1956 rbd_obj_request_get(obj_request);
1957 stat_request->obj_request = obj_request;
1958 stat_request->pages = pages;
1959 stat_request->page_count = page_count;
1960
1961 rbd_assert(obj_request->img_request);
1962 rbd_dev = obj_request->img_request->rbd_dev;
1963 stat_request->osd_req = rbd_osd_req_create(rbd_dev, false,
1964 stat_request);
1965 if (!stat_request->osd_req)
1966 goto out;
1967 stat_request->callback = rbd_img_obj_exists_callback;
1968
1969 osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
1970 osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
1971 false, false);
1972 rbd_osd_req_format(stat_request, false);
1973
1974 osdc = &rbd_dev->rbd_client->client->osdc;
1975 ret = rbd_obj_request_submit(osdc, stat_request);
1976out:
1977 if (ret)
1978 rbd_obj_request_put(obj_request);
1979
1980 return ret;
1981}
1982
1850static int rbd_img_request_submit(struct rbd_img_request *img_request) 1983static int rbd_img_request_submit(struct rbd_img_request *img_request)
1851{ 1984{
1852 struct rbd_device *rbd_dev = img_request->rbd_dev; 1985 struct rbd_device *rbd_dev = img_request->rbd_dev;
1853 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 1986 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
1854 struct rbd_obj_request *obj_request; 1987 struct rbd_obj_request *obj_request;
1855 struct rbd_obj_request *next_obj_request; 1988 struct rbd_obj_request *next_obj_request;
1989 bool write_request = img_request_write_test(img_request);
1990 bool layered = img_request_layered_test(img_request);
1856 1991
1857 dout("%s: img %p\n", __func__, img_request); 1992 dout("%s: img %p\n", __func__, img_request);
1858 for_each_obj_request_safe(img_request, obj_request, next_obj_request) { 1993 for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
1994 bool known;
1995 bool object_exists;
1859 int ret; 1996 int ret;
1860 1997
1861 ret = rbd_obj_request_submit(osdc, obj_request); 1998 /*
1999 * We need to know whether the target object exists
2000 * for a layered write. Issue an existence check
2001 * first if we need to.
2002 */
2003 known = obj_request_known_test(obj_request);
2004 object_exists = known && obj_request_exists_test(obj_request);
2005 if (!write_request || !layered || object_exists)
2006 ret = rbd_obj_request_submit(osdc, obj_request);
2007 else
2008 ret = rbd_img_obj_exists_submit(obj_request);
1862 if (ret) 2009 if (ret)
1863 return ret; 2010 return ret;
1864 } 2011 }