drbd: fix potential deadlock during "restart" of conflicting writes

w_restart_write(), run from worker context, calls __drbd_make_request() and further drbd_al_begin_io(, delegate=true), which then potentially deadlocks. The previous patch moved a BUG_ON to expose such call paths, which would now be triggered. Also, if we call __drbd_make_request() from resource worker context, like w_restart_write() did, and that should block for whatever reason (!drbd_state_is_stable(), resource suspended, ...), we potentially deadlock the whole resource, as the worker is needed for state changes and other things. Create a dedicated retry workqueue for this instead. Also make sure that inc_ap_bio()/dec_ap_bio() are properly paired, even if do_retry() needs to retry itself, in case __drbd_make_request() returns != 0. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2011-11-24 04:36:25 -0500
committer: Philipp Reisner <philipp.reisner@linbit.com> 2012-11-08 10:58:21 -0500
commit: 2312f0b3c5ab794fbac9e9bebe90c784c9d449c5 (patch)
tree: 0b4b74af502169d61e026855593b828fa202665a /drivers/block/drbd/drbd_main.c
parent: f9916d61a40e7ad43c2a20444894f85c45512f91 (diff)
1 files changed, 79 insertions, 0 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a0045ac88040..5529d392e5d3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2383,6 +2383,73 @@ void drbd_minor_destroy(struct kref *kref)
        kref_put(&tconn->kref, &conn_destroy);
 }
+/* One global retry thread, if we need to push back some bio and have it
+ * reinserted through our make request function.
+ */
+static struct retry_worker {
+        struct workqueue_struct *wq;
+        struct work_struct worker;
+        spinlock_t lock;
+        struct list_head writes;
+} retry;
+static void do_retry(struct work_struct *ws)
+{
+        struct retry_worker *retry = container_of(ws, struct retry_worker, worker);
+        LIST_HEAD(writes);
+        struct drbd_request *req, *tmp;
+        spin_lock_irq(&retry->lock);
+        list_splice_init(&retry->writes, &writes);
+        spin_unlock_irq(&retry->lock);
+        list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
+                struct drbd_conf *mdev = req->w.mdev;
+                struct bio *bio = req->master_bio;
+                unsigned long start_time = req->start_time;
+                /* We have exclusive access to this request object.
+                 * If it had not been RQ_POSTPONED, the code path which queued
+                 * it here would have completed and freed it already.
+                 */
+                mempool_free(req, drbd_request_mempool);
+                /* A single suspended or otherwise blocking device may stall
+                 * all others as well.  Fortunately, this code path is to
+                 * recover from a situation that "should not happen":
+                 * concurrent writes in multi-primary setup.
+                 * In a "normal" lifecycle, this workqueue is supposed to be
+                 * destroyed without ever doing anything.
+                 * If it turns out to be an issue anyways, we can do per
+                 * resource (replication group) or per device (minor) retry
+                 * workqueues instead.
+                 */
+                /* We are not just doing generic_make_request(),
+                 * as we want to keep the start_time information. */
+                do {
+                        inc_ap_bio(mdev);
+                } while(__drbd_make_request(mdev, bio, start_time));
+        }
+}
+void drbd_restart_write(struct drbd_request *req)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&retry.lock, flags);
+        list_move_tail(&req->tl_requests, &retry.writes);
+        spin_unlock_irqrestore(&retry.lock, flags);
+        /* Drop the extra reference that would otherwise
+         * have been dropped by complete_master_bio.
+         * do_retry() needs to grab a new one. */
+        dec_ap_bio(req->w.mdev);
+        queue_work(retry.wq, &retry.worker);
+}
 static void drbd_cleanup(void)
 {
        unsigned int i;
@@ -2402,6 +2469,9 @@ static void drbd_cleanup(void)
        if (drbd_proc)
                remove_proc_entry("drbd", NULL);
+        if (retry.wq)
+                destroy_workqueue(retry.wq);
        drbd_genl_unregister();
        idr_for_each_entry(&minors, mdev, i) {
@@ -2851,6 +2921,15 @@ int __init drbd_init(void)
        rwlock_init(&global_state_lock);
        INIT_LIST_HEAD(&drbd_tconns);
+        retry.wq = create_singlethread_workqueue("drbd-reissue");
+        if (!retry.wq) {
+                printk(KERN_ERR "drbd: unable to create retry workqueue\n");
+                goto fail;
+        }
+        INIT_WORK(&retry.worker, do_retry);
+        spin_lock_init(&retry.lock);
+        INIT_LIST_HEAD(&retry.writes);
        printk(KERN_INFO "drbd: initialized. "
               "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
               API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2011-11-24 04:36:25 -0500
committer	Philipp Reisner <philipp.reisner@linbit.com>	2012-11-08 10:58:21 -0500
commit	2312f0b3c5ab794fbac9e9bebe90c784c9d449c5 (patch)
tree	0b4b74af502169d61e026855593b828fa202665a /drivers/block/drbd/drbd_main.c
parent	f9916d61a40e7ad43c2a20444894f85c45512f91 (diff)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index a0045ac88040..5529d392e5d3 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c
@@ -2383,6 +2383,73 @@ void drbd_minor_destroy(struct kref *kref)
2383	kref_put(&tconn->kref, &conn_destroy);	2383	kref_put(&tconn->kref, &conn_destroy);
2384	}	2384	}
2385		2385
		2386	/* One global retry thread, if we need to push back some bio and have it
		2387	* reinserted through our make request function.
		2388	*/
		2389	static struct retry_worker {
		2390	struct workqueue_struct *wq;
		2391	struct work_struct worker;
		2392
		2393	spinlock_t lock;
		2394	struct list_head writes;
		2395	} retry;
		2396
		2397	static void do_retry(struct work_struct *ws)
		2398	{
		2399	struct retry_worker *retry = container_of(ws, struct retry_worker, worker);
		2400	LIST_HEAD(writes);
		2401	struct drbd_request req, tmp;
		2402
		2403	spin_lock_irq(&retry->lock);
		2404	list_splice_init(&retry->writes, &writes);
		2405	spin_unlock_irq(&retry->lock);
		2406
		2407	list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
		2408	struct drbd_conf *mdev = req->w.mdev;
		2409	struct bio *bio = req->master_bio;
		2410	unsigned long start_time = req->start_time;
		2411
		2412	/* We have exclusive access to this request object.
		2413	* If it had not been RQ_POSTPONED, the code path which queued
		2414	* it here would have completed and freed it already.
		2415	*/
		2416	mempool_free(req, drbd_request_mempool);
		2417
		2418	/* A single suspended or otherwise blocking device may stall
		2419	* all others as well. Fortunately, this code path is to
		2420	* recover from a situation that "should not happen":
		2421	* concurrent writes in multi-primary setup.
		2422	* In a "normal" lifecycle, this workqueue is supposed to be
		2423	* destroyed without ever doing anything.
		2424	* If it turns out to be an issue anyways, we can do per
		2425	* resource (replication group) or per device (minor) retry
		2426	* workqueues instead.
		2427	*/
		2428
		2429	/* We are not just doing generic_make_request(),
		2430	* as we want to keep the start_time information. */
		2431	do {
		2432	inc_ap_bio(mdev);
		2433	} while(__drbd_make_request(mdev, bio, start_time));
		2434	}
		2435	}
		2436
		2437	void drbd_restart_write(struct drbd_request *req)
		2438	{
		2439	unsigned long flags;
		2440	spin_lock_irqsave(&retry.lock, flags);
		2441	list_move_tail(&req->tl_requests, &retry.writes);
		2442	spin_unlock_irqrestore(&retry.lock, flags);
		2443
		2444	/* Drop the extra reference that would otherwise
		2445	* have been dropped by complete_master_bio.
		2446	* do_retry() needs to grab a new one. */
		2447	dec_ap_bio(req->w.mdev);
		2448
		2449	queue_work(retry.wq, &retry.worker);
		2450	}
		2451
		2452
2386	static void drbd_cleanup(void)	2453	static void drbd_cleanup(void)
2387	{	2454	{
2388	unsigned int i;	2455	unsigned int i;
@@ -2402,6 +2469,9 @@ static void drbd_cleanup(void)
2402	if (drbd_proc)	2469	if (drbd_proc)
2403	remove_proc_entry("drbd", NULL);	2470	remove_proc_entry("drbd", NULL);
2404		2471
		2472	if (retry.wq)
		2473	destroy_workqueue(retry.wq);
		2474
2405	drbd_genl_unregister();	2475	drbd_genl_unregister();
2406		2476
2407	idr_for_each_entry(&minors, mdev, i) {	2477	idr_for_each_entry(&minors, mdev, i) {
@@ -2851,6 +2921,15 @@ int __init drbd_init(void)
2851	rwlock_init(&global_state_lock);	2921	rwlock_init(&global_state_lock);
2852	INIT_LIST_HEAD(&drbd_tconns);	2922	INIT_LIST_HEAD(&drbd_tconns);
2853		2923
		2924	retry.wq = create_singlethread_workqueue("drbd-reissue");
		2925	if (!retry.wq) {
		2926	printk(KERN_ERR "drbd: unable to create retry workqueue\n");
		2927	goto fail;
		2928	}
		2929	INIT_WORK(&retry.worker, do_retry);
		2930	spin_lock_init(&retry.lock);
		2931	INIT_LIST_HEAD(&retry.writes);
		2932
2854	printk(KERN_INFO "drbd: initialized. "	2933	printk(KERN_INFO "drbd: initialized. "
2855	"Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",	2934	"Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2856	API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);	2935	API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);