diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2015-07-16 10:36:11 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-07-31 04:38:57 -0400 |
commit | 2761713d35e370fd640b5781109f753066b746c4 (patch) | |
tree | 666214eac7b140de1d9444e4207e418acadf78cb | |
parent | fc927cd32feca2acefd90a4ac317fa4f0a2e5955 (diff) |
rbd: fix copyup completion race
For write/discard obj_requests that involved a copyup method call, the
opcode of the first op is CEPH_OSD_OP_CALL and the ->callback is
rbd_img_obj_copyup_callback(). The latter frees copyup pages, sets
->xferred and delegates to rbd_img_obj_callback(), the "normal" image
object callback, for reporting to block layer and putting refs.
rbd_osd_req_callback() however treats CEPH_OSD_OP_CALL as a trivial op,
which means obj_request is marked done in rbd_osd_trivial_callback(),
*before* ->callback is invoked and rbd_img_obj_copyup_callback() has
a chance to run. Marking obj_request done essentially means giving
rbd_img_obj_callback() a license to end it at any moment, so if another
obj_request from the same img_request is being completed concurrently,
rbd_img_obj_end_request() may very well be called on such prematurally
marked done request:
<obj_request-1/2 reply>
handle_reply()
rbd_osd_req_callback()
rbd_osd_trivial_callback()
rbd_obj_request_complete()
rbd_img_obj_copyup_callback()
rbd_img_obj_callback()
<obj_request-2/2 reply>
handle_reply()
rbd_osd_req_callback()
rbd_osd_trivial_callback()
for_each_obj_request(obj_request->img_request) {
rbd_img_obj_end_request(obj_request-1/2)
rbd_img_obj_end_request(obj_request-2/2) <--
}
Calling rbd_img_obj_end_request() on such a request leads to trouble,
in particular because its ->xfferred is 0. We report 0 to the block
layer with blk_update_request(), get back 1 for "this request has more
data in flight" and then trip on
rbd_assert(more ^ (which == img_request->obj_request_count));
with rhs (which == ...) being 1 because rbd_img_obj_end_request() has
been called for both requests and lhs (more) being 1 because we haven't
got a chance to set ->xfferred in rbd_img_obj_copyup_callback() yet.
To fix this, leverage that rbd wants to call class methods in only two
cases: one is a generic method call wrapper (obj_request is standalone)
and the other is a copyup (obj_request is part of an img_request). So
make a dedicated handler for CEPH_OSD_OP_CALL and directly invoke
rbd_img_obj_copyup_callback() from it if obj_request is part of an
img_request, similar to how CEPH_OSD_OP_READ handler invokes
rbd_img_obj_request_read_callback().
Since rbd_img_obj_copyup_callback() is now being called from the OSD
request callback (only), it is renamed to rbd_osd_copyup_callback().
Cc: Alex Elder <elder@linaro.org>
Cc: stable@vger.kernel.org # 3.10+, needs backporting for < 3.18
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
-rw-r--r-- | drivers/block/rbd.c | 22 |
1 files changed, 17 insertions, 5 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d94529d5c8e9..bc67a93aa4f4 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -523,6 +523,7 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) | |||
523 | # define rbd_assert(expr) ((void) 0) | 523 | # define rbd_assert(expr) ((void) 0) |
524 | #endif /* !RBD_DEBUG */ | 524 | #endif /* !RBD_DEBUG */ |
525 | 525 | ||
526 | static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request); | ||
526 | static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); | 527 | static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request); |
527 | static void rbd_img_parent_read(struct rbd_obj_request *obj_request); | 528 | static void rbd_img_parent_read(struct rbd_obj_request *obj_request); |
528 | static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); | 529 | static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); |
@@ -1818,6 +1819,16 @@ static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) | |||
1818 | obj_request_done_set(obj_request); | 1819 | obj_request_done_set(obj_request); |
1819 | } | 1820 | } |
1820 | 1821 | ||
1822 | static void rbd_osd_call_callback(struct rbd_obj_request *obj_request) | ||
1823 | { | ||
1824 | dout("%s: obj %p\n", __func__, obj_request); | ||
1825 | |||
1826 | if (obj_request_img_data_test(obj_request)) | ||
1827 | rbd_osd_copyup_callback(obj_request); | ||
1828 | else | ||
1829 | obj_request_done_set(obj_request); | ||
1830 | } | ||
1831 | |||
1821 | static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, | 1832 | static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, |
1822 | struct ceph_msg *msg) | 1833 | struct ceph_msg *msg) |
1823 | { | 1834 | { |
@@ -1866,6 +1877,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, | |||
1866 | rbd_osd_discard_callback(obj_request); | 1877 | rbd_osd_discard_callback(obj_request); |
1867 | break; | 1878 | break; |
1868 | case CEPH_OSD_OP_CALL: | 1879 | case CEPH_OSD_OP_CALL: |
1880 | rbd_osd_call_callback(obj_request); | ||
1881 | break; | ||
1869 | case CEPH_OSD_OP_NOTIFY_ACK: | 1882 | case CEPH_OSD_OP_NOTIFY_ACK: |
1870 | case CEPH_OSD_OP_WATCH: | 1883 | case CEPH_OSD_OP_WATCH: |
1871 | rbd_osd_trivial_callback(obj_request); | 1884 | rbd_osd_trivial_callback(obj_request); |
@@ -2530,13 +2543,15 @@ out_unwind: | |||
2530 | } | 2543 | } |
2531 | 2544 | ||
2532 | static void | 2545 | static void |
2533 | rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | 2546 | rbd_osd_copyup_callback(struct rbd_obj_request *obj_request) |
2534 | { | 2547 | { |
2535 | struct rbd_img_request *img_request; | 2548 | struct rbd_img_request *img_request; |
2536 | struct rbd_device *rbd_dev; | 2549 | struct rbd_device *rbd_dev; |
2537 | struct page **pages; | 2550 | struct page **pages; |
2538 | u32 page_count; | 2551 | u32 page_count; |
2539 | 2552 | ||
2553 | dout("%s: obj %p\n", __func__, obj_request); | ||
2554 | |||
2540 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO || | 2555 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO || |
2541 | obj_request->type == OBJ_REQUEST_NODATA); | 2556 | obj_request->type == OBJ_REQUEST_NODATA); |
2542 | rbd_assert(obj_request_img_data_test(obj_request)); | 2557 | rbd_assert(obj_request_img_data_test(obj_request)); |
@@ -2563,9 +2578,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | |||
2563 | if (!obj_request->result) | 2578 | if (!obj_request->result) |
2564 | obj_request->xferred = obj_request->length; | 2579 | obj_request->xferred = obj_request->length; |
2565 | 2580 | ||
2566 | /* Finish up with the normal image object callback */ | 2581 | obj_request_done_set(obj_request); |
2567 | |||
2568 | rbd_img_obj_callback(obj_request); | ||
2569 | } | 2582 | } |
2570 | 2583 | ||
2571 | static void | 2584 | static void |
@@ -2650,7 +2663,6 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
2650 | 2663 | ||
2651 | /* All set, send it off. */ | 2664 | /* All set, send it off. */ |
2652 | 2665 | ||
2653 | orig_request->callback = rbd_img_obj_copyup_callback; | ||
2654 | osdc = &rbd_dev->rbd_client->client->osdc; | 2666 | osdc = &rbd_dev->rbd_client->client->osdc; |
2655 | img_result = rbd_obj_request_submit(osdc, orig_request); | 2667 | img_result = rbd_obj_request_submit(osdc, orig_request); |
2656 | if (!img_result) | 2668 | if (!img_result) |