diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-11-24 04:36:25 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-08 10:58:21 -0500 |
commit | 2312f0b3c5ab794fbac9e9bebe90c784c9d449c5 (patch) | |
tree | 0b4b74af502169d61e026855593b828fa202665a /drivers/block/drbd/drbd_main.c | |
parent | f9916d61a40e7ad43c2a20444894f85c45512f91 (diff) |
drbd: fix potential deadlock during "restart" of conflicting writes
w_restart_write(), run from worker context, calls __drbd_make_request()
and further drbd_al_begin_io(, delegate=true), which then
potentially deadlocks. The previous patch moved a BUG_ON to expose
such call paths, which would now be triggered.
Also, if we call __drbd_make_request() from resource worker context,
like w_restart_write() did, and that should block for whatever reason
(!drbd_state_is_stable(), resource suspended, ...),
we potentially deadlock the whole resource, as the worker
is needed for state changes and other things.
Create a dedicated retry workqueue for this instead.
Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired,
even if do_retry() needs to retry itself,
in case __drbd_make_request() returns != 0.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_main.c')
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a0045ac88040..5529d392e5d3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -2383,6 +2383,73 @@ void drbd_minor_destroy(struct kref *kref) | |||
2383 | kref_put(&tconn->kref, &conn_destroy); | 2383 | kref_put(&tconn->kref, &conn_destroy); |
2384 | } | 2384 | } |
2385 | 2385 | ||
2386 | /* One global retry thread, if we need to push back some bio and have it | ||
2387 | * reinserted through our make request function. | ||
2388 | */ | ||
2389 | static struct retry_worker { | ||
2390 | struct workqueue_struct *wq; | ||
2391 | struct work_struct worker; | ||
2392 | |||
2393 | spinlock_t lock; | ||
2394 | struct list_head writes; | ||
2395 | } retry; | ||
2396 | |||
2397 | static void do_retry(struct work_struct *ws) | ||
2398 | { | ||
2399 | struct retry_worker *retry = container_of(ws, struct retry_worker, worker); | ||
2400 | LIST_HEAD(writes); | ||
2401 | struct drbd_request *req, *tmp; | ||
2402 | |||
2403 | spin_lock_irq(&retry->lock); | ||
2404 | list_splice_init(&retry->writes, &writes); | ||
2405 | spin_unlock_irq(&retry->lock); | ||
2406 | |||
2407 | list_for_each_entry_safe(req, tmp, &writes, tl_requests) { | ||
2408 | struct drbd_conf *mdev = req->w.mdev; | ||
2409 | struct bio *bio = req->master_bio; | ||
2410 | unsigned long start_time = req->start_time; | ||
2411 | |||
2412 | /* We have exclusive access to this request object. | ||
2413 | * If it had not been RQ_POSTPONED, the code path which queued | ||
2414 | * it here would have completed and freed it already. | ||
2415 | */ | ||
2416 | mempool_free(req, drbd_request_mempool); | ||
2417 | |||
2418 | /* A single suspended or otherwise blocking device may stall | ||
2419 | * all others as well. Fortunately, this code path is to | ||
2420 | * recover from a situation that "should not happen": | ||
2421 | * concurrent writes in multi-primary setup. | ||
2422 | * In a "normal" lifecycle, this workqueue is supposed to be | ||
2423 | * destroyed without ever doing anything. | ||
2424 | * If it turns out to be an issue anyways, we can do per | ||
2425 | * resource (replication group) or per device (minor) retry | ||
2426 | * workqueues instead. | ||
2427 | */ | ||
2428 | |||
2429 | /* We are not just doing generic_make_request(), | ||
2430 | * as we want to keep the start_time information. */ | ||
2431 | do { | ||
2432 | inc_ap_bio(mdev); | ||
2433 | } while(__drbd_make_request(mdev, bio, start_time)); | ||
2434 | } | ||
2435 | } | ||
2436 | |||
2437 | void drbd_restart_write(struct drbd_request *req) | ||
2438 | { | ||
2439 | unsigned long flags; | ||
2440 | spin_lock_irqsave(&retry.lock, flags); | ||
2441 | list_move_tail(&req->tl_requests, &retry.writes); | ||
2442 | spin_unlock_irqrestore(&retry.lock, flags); | ||
2443 | |||
2444 | /* Drop the extra reference that would otherwise | ||
2445 | * have been dropped by complete_master_bio. | ||
2446 | * do_retry() needs to grab a new one. */ | ||
2447 | dec_ap_bio(req->w.mdev); | ||
2448 | |||
2449 | queue_work(retry.wq, &retry.worker); | ||
2450 | } | ||
2451 | |||
2452 | |||
2386 | static void drbd_cleanup(void) | 2453 | static void drbd_cleanup(void) |
2387 | { | 2454 | { |
2388 | unsigned int i; | 2455 | unsigned int i; |
@@ -2402,6 +2469,9 @@ static void drbd_cleanup(void) | |||
2402 | if (drbd_proc) | 2469 | if (drbd_proc) |
2403 | remove_proc_entry("drbd", NULL); | 2470 | remove_proc_entry("drbd", NULL); |
2404 | 2471 | ||
2472 | if (retry.wq) | ||
2473 | destroy_workqueue(retry.wq); | ||
2474 | |||
2405 | drbd_genl_unregister(); | 2475 | drbd_genl_unregister(); |
2406 | 2476 | ||
2407 | idr_for_each_entry(&minors, mdev, i) { | 2477 | idr_for_each_entry(&minors, mdev, i) { |
@@ -2851,6 +2921,15 @@ int __init drbd_init(void) | |||
2851 | rwlock_init(&global_state_lock); | 2921 | rwlock_init(&global_state_lock); |
2852 | INIT_LIST_HEAD(&drbd_tconns); | 2922 | INIT_LIST_HEAD(&drbd_tconns); |
2853 | 2923 | ||
2924 | retry.wq = create_singlethread_workqueue("drbd-reissue"); | ||
2925 | if (!retry.wq) { | ||
2926 | printk(KERN_ERR "drbd: unable to create retry workqueue\n"); | ||
2927 | goto fail; | ||
2928 | } | ||
2929 | INIT_WORK(&retry.worker, do_retry); | ||
2930 | spin_lock_init(&retry.lock); | ||
2931 | INIT_LIST_HEAD(&retry.writes); | ||
2932 | |||
2854 | printk(KERN_INFO "drbd: initialized. " | 2933 | printk(KERN_INFO "drbd: initialized. " |
2855 | "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", | 2934 | "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", |
2856 | API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); | 2935 | API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); |