aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorGuangliang Zhao <lucienchao@gmail.com>2014-04-01 10:22:16 -0400
committerIlya Dryomov <idryomov@redhat.com>2014-10-14 13:03:31 -0400
commit90e98c5229c0adfadf2c2ad2c91d72902bf61bc4 (patch)
treebf0b039cc39d8fbff1b69429d8dd9489df65e939 /drivers/block
parent6d2940c881aeb9f46baac548dc4e906a53957dba (diff)
rbd: initial discard bits from Guangliang Zhao
This patch add the discard support for rbd driver. There are three types operation in the driver: 1. The objects would be removed if they completely contained within the discard range. 2. The objects would be truncated if they partly contained within the discard range, and align with their boundary. 3. Others would be zeroed. A discard request from blkdev_issue_discard() is defined which REQ_WRITE and REQ_DISCARD both marked and no data, so we must check the REQ_DISCARD first when getting the request type. This resolve: http://tracker.ceph.com/issues/190 [ Ilya Dryomov: This is incomplete and somewhat buggy, see follow up commits by Josh Durgin for refinements and fixes which weren't folded in to preserve authorship. ] Signed-off-by: Guangliang Zhao <lucienchao@gmail.com> Reviewed-by: Josh Durgin <josh.durgin@inktank.com> Reviewed-by: Alex Elder <elder@linaro.org>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/rbd.c104
1 files changed, 89 insertions, 15 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d68c937d0a12..e2f7a708e20d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -213,6 +213,7 @@ enum obj_request_type {
213enum obj_operation_type { 213enum obj_operation_type {
214 OBJ_OP_WRITE, 214 OBJ_OP_WRITE,
215 OBJ_OP_READ, 215 OBJ_OP_READ,
216 OBJ_OP_DISCARD,
216}; 217};
217 218
218enum obj_req_flags { 219enum obj_req_flags {
@@ -281,6 +282,7 @@ enum img_req_flags {
281 IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ 282 IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */
282 IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ 283 IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */
283 IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ 284 IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */
285 IMG_REQ_DISCARD, /* discard: normal = 0, discard request = 1 */
284}; 286};
285 287
286struct rbd_img_request { 288struct rbd_img_request {
@@ -797,6 +799,8 @@ static char* obj_op_name(enum obj_operation_type op_type)
797 return "read"; 799 return "read";
798 case OBJ_OP_WRITE: 800 case OBJ_OP_WRITE:
799 return "write"; 801 return "write";
802 case OBJ_OP_DISCARD:
803 return "discard";
800 default: 804 default:
801 return "???"; 805 return "???";
802 } 806 }
@@ -1617,6 +1621,21 @@ static bool img_request_write_test(struct rbd_img_request *img_request)
1617 return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; 1621 return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
1618} 1622}
1619 1623
1624/*
1625 * Set the discard flag when the img_request is an discard request
1626 */
1627static void img_request_discard_set(struct rbd_img_request *img_request)
1628{
1629 set_bit(IMG_REQ_DISCARD, &img_request->flags);
1630 smp_mb();
1631}
1632
1633static bool img_request_discard_test(struct rbd_img_request *img_request)
1634{
1635 smp_mb();
1636 return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0;
1637}
1638
1620static void img_request_child_set(struct rbd_img_request *img_request) 1639static void img_request_child_set(struct rbd_img_request *img_request)
1621{ 1640{
1622 set_bit(IMG_REQ_CHILD, &img_request->flags); 1641 set_bit(IMG_REQ_CHILD, &img_request->flags);
@@ -1739,6 +1758,18 @@ static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
1739 obj_request_done_set(obj_request); 1758 obj_request_done_set(obj_request);
1740} 1759}
1741 1760
1761static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request)
1762{
1763 dout("%s: obj %p result %d %llu\n", __func__, obj_request,
1764 obj_request->result, obj_request->length);
1765 /*
1766 * There is no such thing as a successful short discard. Set
1767 * it to our originally-requested length.
1768 */
1769 obj_request->xferred = obj_request->length;
1770 obj_request_done_set(obj_request);
1771}
1772
1742/* 1773/*
1743 * For a simple stat call there's nothing to do. We'll do more if 1774 * For a simple stat call there's nothing to do. We'll do more if
1744 * this is part of a write sequence for a layered image. 1775 * this is part of a write sequence for a layered image.
@@ -1790,6 +1821,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
1790 case CEPH_OSD_OP_STAT: 1821 case CEPH_OSD_OP_STAT:
1791 rbd_osd_stat_callback(obj_request); 1822 rbd_osd_stat_callback(obj_request);
1792 break; 1823 break;
1824 case CEPH_OSD_OP_DELETE:
1825 case CEPH_OSD_OP_TRUNCATE:
1826 case CEPH_OSD_OP_ZERO:
1827 rbd_osd_discard_callback(obj_request);
1828 break;
1793 case CEPH_OSD_OP_CALL: 1829 case CEPH_OSD_OP_CALL:
1794 case CEPH_OSD_OP_NOTIFY_ACK: 1830 case CEPH_OSD_OP_NOTIFY_ACK:
1795 case CEPH_OSD_OP_WATCH: 1831 case CEPH_OSD_OP_WATCH:
@@ -1848,10 +1884,14 @@ static struct ceph_osd_request *rbd_osd_req_create(
1848 struct ceph_osd_client *osdc; 1884 struct ceph_osd_client *osdc;
1849 struct ceph_osd_request *osd_req; 1885 struct ceph_osd_request *osd_req;
1850 1886
1851 if (obj_request_img_data_test(obj_request) && op_type == OBJ_OP_WRITE) { 1887 if (obj_request_img_data_test(obj_request) &&
1888 (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) {
1852 struct rbd_img_request *img_request = obj_request->img_request; 1889 struct rbd_img_request *img_request = obj_request->img_request;
1853 1890 if (op_type == OBJ_OP_WRITE) {
1854 rbd_assert(img_request_write_test(img_request)); 1891 rbd_assert(img_request_write_test(img_request));
1892 } else {
1893 rbd_assert(img_request_discard_test(img_request));
1894 }
1855 snapc = img_request->snapc; 1895 snapc = img_request->snapc;
1856 } 1896 }
1857 1897
@@ -1865,7 +1905,7 @@ static struct ceph_osd_request *rbd_osd_req_create(
1865 if (!osd_req) 1905 if (!osd_req)
1866 return NULL; /* ENOMEM */ 1906 return NULL; /* ENOMEM */
1867 1907
1868 if (op_type == OBJ_OP_WRITE) 1908 if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
1869 osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; 1909 osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
1870 else 1910 else
1871 osd_req->r_flags = CEPH_OSD_FLAG_READ; 1911 osd_req->r_flags = CEPH_OSD_FLAG_READ;
@@ -2086,7 +2126,10 @@ static struct rbd_img_request *rbd_img_request_create(
2086 img_request->offset = offset; 2126 img_request->offset = offset;
2087 img_request->length = length; 2127 img_request->length = length;
2088 img_request->flags = 0; 2128 img_request->flags = 0;
2089 if (op_type == OBJ_OP_WRITE) { 2129 if (op_type == OBJ_OP_DISCARD) {
2130 img_request_discard_set(img_request);
2131 img_request->snapc = snapc;
2132 } else if (op_type == OBJ_OP_WRITE) {
2090 img_request_write_set(img_request); 2133 img_request_write_set(img_request);
2091 img_request->snapc = snapc; 2134 img_request->snapc = snapc;
2092 } else { 2135 } else {
@@ -2187,8 +2230,12 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
2187 struct rbd_device *rbd_dev = img_request->rbd_dev; 2230 struct rbd_device *rbd_dev = img_request->rbd_dev;
2188 enum obj_operation_type op_type; 2231 enum obj_operation_type op_type;
2189 2232
2190 op_type = img_request_write_test(img_request) ? OBJ_OP_WRITE : 2233 if (img_request_discard_test(img_request))
2191 OBJ_OP_READ; 2234 op_type = OBJ_OP_DISCARD;
2235 else if (img_request_write_test(img_request))
2236 op_type = OBJ_OP_WRITE;
2237 else
2238 op_type = OBJ_OP_READ;
2192 2239
2193 rbd_warn(rbd_dev, "%s %llx at %llx (%llx)", 2240 rbd_warn(rbd_dev, "%s %llx at %llx (%llx)",
2194 obj_op_name(op_type), obj_request->length, 2241 obj_op_name(op_type), obj_request->length,
@@ -2275,7 +2322,9 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2275 unsigned int bio_offset = 0; 2322 unsigned int bio_offset = 0;
2276 struct page **pages = NULL; 2323 struct page **pages = NULL;
2277 enum obj_operation_type op_type; 2324 enum obj_operation_type op_type;
2325 u64 object_size = rbd_obj_bytes(&rbd_dev->header);
2278 u64 img_offset; 2326 u64 img_offset;
2327 u64 img_end;
2279 u64 resid; 2328 u64 resid;
2280 u16 opcode; 2329 u16 opcode;
2281 2330
@@ -2283,6 +2332,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2283 (int)type, data_desc); 2332 (int)type, data_desc);
2284 2333
2285 img_offset = img_request->offset; 2334 img_offset = img_request->offset;
2335 img_end = rbd_dev->header.image_size;
2286 resid = img_request->length; 2336 resid = img_request->length;
2287 rbd_assert(resid > 0); 2337 rbd_assert(resid > 0);
2288 2338
@@ -2290,8 +2340,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2290 bio_list = data_desc; 2340 bio_list = data_desc;
2291 rbd_assert(img_offset == 2341 rbd_assert(img_offset ==
2292 bio_list->bi_iter.bi_sector << SECTOR_SHIFT); 2342 bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
2293 } else { 2343 } else if (type == OBJ_REQUEST_PAGES) {
2294 rbd_assert(type == OBJ_REQUEST_PAGES);
2295 pages = data_desc; 2344 pages = data_desc;
2296 } 2345 }
2297 2346
@@ -2332,7 +2381,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2332 GFP_ATOMIC); 2381 GFP_ATOMIC);
2333 if (!obj_request->bio_list) 2382 if (!obj_request->bio_list)
2334 goto out_unwind; 2383 goto out_unwind;
2335 } else { 2384 } else if (type == OBJ_REQUEST_PAGES) {
2336 unsigned int page_count; 2385 unsigned int page_count;
2337 2386
2338 obj_request->pages = pages; 2387 obj_request->pages = pages;
@@ -2343,7 +2392,19 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2343 pages += page_count; 2392 pages += page_count;
2344 } 2393 }
2345 2394
2346 if (img_request_write_test(img_request)) { 2395 if (img_request_discard_test(img_request)) {
2396 op_type = OBJ_OP_DISCARD;
2397 if (!offset && (length == object_size)
2398 && (!img_request_layered_test(img_request) ||
2399 (rbd_dev->parent_overlap <=
2400 obj_request->img_offset)))
2401 opcode = CEPH_OSD_OP_DELETE;
2402 else if ((offset + length == object_size) ||
2403 (obj_request->img_offset + length == img_end))
2404 opcode = CEPH_OSD_OP_TRUNCATE;
2405 else
2406 opcode = CEPH_OSD_OP_ZERO;
2407 } else if (img_request_write_test(img_request)) {
2347 op_type = OBJ_OP_WRITE; 2408 op_type = OBJ_OP_WRITE;
2348 opcode = CEPH_OSD_OP_WRITE; 2409 opcode = CEPH_OSD_OP_WRITE;
2349 } else { 2410 } else {
@@ -2372,12 +2433,13 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request,
2372 if (type == OBJ_REQUEST_BIO) 2433 if (type == OBJ_REQUEST_BIO)
2373 osd_req_op_extent_osd_data_bio(osd_req, which, 2434 osd_req_op_extent_osd_data_bio(osd_req, which,
2374 obj_request->bio_list, length); 2435 obj_request->bio_list, length);
2375 else 2436 else if (type == OBJ_REQUEST_PAGES)
2376 osd_req_op_extent_osd_data_pages(osd_req, which, 2437 osd_req_op_extent_osd_data_pages(osd_req, which,
2377 obj_request->pages, length, 2438 obj_request->pages, length,
2378 offset & ~PAGE_MASK, false, false); 2439 offset & ~PAGE_MASK, false, false);
2379 2440
2380 if (op_type == OBJ_OP_WRITE) 2441 /* Discards are also writes */
2442 if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
2381 rbd_osd_req_format_write(obj_request); 2443 rbd_osd_req_format_write(obj_request);
2382 else 2444 else
2383 rbd_osd_req_format_read(obj_request); 2445 rbd_osd_req_format_read(obj_request);
@@ -3229,7 +3291,9 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
3229 u64 mapping_size; 3291 u64 mapping_size;
3230 int result; 3292 int result;
3231 3293
3232 if (rq->cmd_flags & REQ_WRITE) 3294 if (rq->cmd_flags & REQ_DISCARD)
3295 op_type = OBJ_OP_DISCARD;
3296 else if (rq->cmd_flags & REQ_WRITE)
3233 op_type = OBJ_OP_WRITE; 3297 op_type = OBJ_OP_WRITE;
3234 else 3298 else
3235 op_type = OBJ_OP_READ; 3299 op_type = OBJ_OP_READ;
@@ -3295,7 +3359,12 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
3295 } 3359 }
3296 img_request->rq = rq; 3360 img_request->rq = rq;
3297 3361
3298 result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, rq->bio); 3362 if (op_type == OBJ_OP_DISCARD)
3363 result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
3364 NULL);
3365 else
3366 result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
3367 rq->bio);
3299 if (result) 3368 if (result)
3300 goto err_img_request; 3369 goto err_img_request;
3301 3370
@@ -3667,6 +3736,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
3667 blk_queue_io_min(q, segment_size); 3736 blk_queue_io_min(q, segment_size);
3668 blk_queue_io_opt(q, segment_size); 3737 blk_queue_io_opt(q, segment_size);
3669 3738
3739 /* enable the discard support */
3740 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
3741 q->limits.discard_granularity = segment_size;
3742 q->limits.discard_alignment = segment_size;
3743
3670 blk_queue_merge_bvec(q, rbd_merge_bvec); 3744 blk_queue_merge_bvec(q, rbd_merge_bvec);
3671 disk->queue = q; 3745 disk->queue = q;
3672 3746