diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-11-24 04:36:25 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-08 10:58:21 -0500 |
commit | 2312f0b3c5ab794fbac9e9bebe90c784c9d449c5 (patch) | |
tree | 0b4b74af502169d61e026855593b828fa202665a /drivers/block/drbd/drbd_req.c | |
parent | f9916d61a40e7ad43c2a20444894f85c45512f91 (diff) |
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request()
and further drbd_al_begin_io(, delegate=true), which then
potentially deadlocks. The previous patch moved a BUG_ON to expose
such call paths, which would now be triggered.
Also, if we call __drbd_make_request() from resource worker context,
like w_restart_write() did, and that should block for whatever reason
(!drbd_state_is_stable(), resource suspended, ...),
we potentially deadlock the whole resource, as the worker
is needed for state changes and other things.
Create a dedicated retry workqueue for this instead.
Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired,
even if do_retry() needs to retry itself,
in case __drbd_make_request() returns != 0.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_req.c')
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c3f99bde0e11..5f4436c3abb3 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -104,7 +104,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const | |||
104 | * and never sent), it should still be "empty" as | 104 | * and never sent), it should still be "empty" as |
105 | * initialized in drbd_req_new(), so we can list_del() it | 105 | * initialized in drbd_req_new(), so we can list_del() it |
106 | * here unconditionally */ | 106 | * here unconditionally */ |
107 | list_del(&req->tl_requests); | 107 | list_del_init(&req->tl_requests); |
108 | 108 | ||
109 | /* if it was a write, we may have to set the corresponding | 109 | /* if it was a write, we may have to set the corresponding |
110 | * bit(s) out-of-sync first. If it had a local part, we need to | 110 | * bit(s) out-of-sync first. If it had a local part, we need to |
@@ -143,7 +143,10 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const | |||
143 | } | 143 | } |
144 | } | 144 | } |
145 | 145 | ||
146 | drbd_req_free(req); | 146 | if (s & RQ_POSTPONED) |
147 | drbd_restart_write(req); | ||
148 | else | ||
149 | drbd_req_free(req); | ||
147 | } | 150 | } |
148 | 151 | ||
149 | static void queue_barrier(struct drbd_conf *mdev) | 152 | static void queue_barrier(struct drbd_conf *mdev) |
@@ -289,8 +292,16 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) | |||
289 | if (!(s & RQ_POSTPONED)) { | 292 | if (!(s & RQ_POSTPONED)) { |
290 | m->error = ok ? 0 : (error ?: -EIO); | 293 | m->error = ok ? 0 : (error ?: -EIO); |
291 | m->bio = req->master_bio; | 294 | m->bio = req->master_bio; |
295 | req->master_bio = NULL; | ||
296 | } else { | ||
297 | /* Assert that this will be _req_is_done() | ||
298 | * with this very invokation. */ | ||
299 | /* FIXME: | ||
300 | * what about (RQ_LOCAL_PENDING | RQ_LOCAL_ABORTED)? | ||
301 | */ | ||
302 | D_ASSERT(!(s & RQ_LOCAL_PENDING)); | ||
303 | D_ASSERT(s & RQ_NET_DONE); | ||
292 | } | 304 | } |
293 | req->master_bio = NULL; | ||
294 | } | 305 | } |
295 | 306 | ||
296 | if (s & RQ_LOCAL_PENDING) | 307 | if (s & RQ_LOCAL_PENDING) |