md: fix deadlock in md/raid1 and md/raid10 when handling a read error

When handling a read error, we freeze the array to stop any other IO while attempting to over-write with correct data. This is done in the raid1d(raid10d) thread and must wait for all submitted IO to complete (except for requests that failed and are sitting in the retry queue - these are counted in ->nr_queue and will stay there during a freeze). However write requests need attention from raid1d as bitmap updates might be required. This can cause a deadlock as raid1 is waiting for requests to finish that themselves need attention from raid1d. So we create a new function 'flush_pending_writes' to give that attention, and call it in freeze_array to be sure that we aren't waiting on raid1d. Thanks to "K.Tanaka" <k-tanaka@ce.jp.nec.com> for finding and reporting this problem. Cc: "K.Tanaka" <k-tanaka@ce.jp.nec.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: NeilBrown <neilb@suse.de> 2008-03-04 17:29:29 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-03-04 19:35:17 -0500
commit: a35e63efa1fb18c6f20f38e3ddf3f8ffbcf0f6e7 (patch)
tree: 8dddd54c45ebaad84a6178765d29d9536df944d1 /drivers/md/raid10.c
parent: 466634488e80968f12e73dd1fe6af5c37a1fbfe2 (diff)
1 files changed, 40 insertions, 22 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 017f58113c33..5de42d87bf4e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -629,7 +629,36 @@ static int raid10_congested(void *data, int bits)
        return ret;
 }
+static int flush_pending_writes(conf_t *conf)
+{
+        /* Any writes that have been queued but are awaiting
+         * bitmap updates get flushed here.
+         * We return 1 if any requests were actually submitted.
+         */
+        int rv = 0;
+        spin_lock_irq(&conf->device_lock);
+        if (conf->pending_bio_list.head) {
+                struct bio *bio;
+                bio = bio_list_get(&conf->pending_bio_list);
+                blk_remove_plug(conf->mddev->queue);
+                spin_unlock_irq(&conf->device_lock);
+                /* flush any pending bitmap writes to disk
+                 * before proceeding w/ I/O */
+                bitmap_unplug(conf->mddev->bitmap);
+                while (bio) { /* submit pending writes */
+                        struct bio *next = bio->bi_next;
+                        bio->bi_next = NULL;
+                        generic_make_request(bio);
+                        bio = next;
+                }
+                rv = 1;
+        } else
+                spin_unlock_irq(&conf->device_lock);
+        return rv;
+}
 /* Barriers....
 * Sometimes we need to suspend IO while we do something else,
 * either some resync/recovery, or reconfigure the array.
@@ -720,7 +749,8 @@ static void freeze_array(conf_t *conf)
        wait_event_lock_irq(conf->wait_barrier,
                            conf->barrier+conf->nr_pending == conf->nr_queued+2,
                            conf->resync_lock,
-                            raid10_unplug(conf->mddev->queue));
+                            ({ flush_pending_writes(conf);
+                               raid10_unplug(conf->mddev->queue); }));
        spin_unlock_irq(&conf->resync_lock);
 }
@@ -892,6 +922,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
        blk_plug_device(mddev->queue);
        spin_unlock_irqrestore(&conf->device_lock, flags);
+        /* In case raid10d snuck in to freeze_array */
+        wake_up(&conf->wait_barrier);
        if (do_sync)
                md_wakeup_thread(mddev->thread);
@@ -1464,28 +1497,14 @@ static void raid10d(mddev_t *mddev)
        for (;;) {
                char b[BDEVNAME_SIZE];
-                spin_lock_irqsave(&conf->device_lock, flags);
-                if (conf->pending_bio_list.head) {
+                unplug += flush_pending_writes(conf);
-                        bio = bio_list_get(&conf->pending_bio_list);
-                        blk_remove_plug(mddev->queue);
-                        spin_unlock_irqrestore(&conf->device_lock, flags);
-                        /* flush any pending bitmap writes to disk before proceeding w/ I/O */
-                        bitmap_unplug(mddev->bitmap);
-                        while (bio) { /* submit pending writes */
+                spin_lock_irqsave(&conf->device_lock, flags);
-                                struct bio *next = bio->bi_next;
+                if (list_empty(head)) {
-                                bio->bi_next = NULL;
+                        spin_unlock_irqrestore(&conf->device_lock, flags);
-                                generic_make_request(bio);
-                                bio = next;
-                        }
-                        unplug = 1;
-                        continue;
-                }
-                if (list_empty(head))
                        break;
+                }
                r10_bio = list_entry(head->prev, r10bio_t, retry_list);
                list_del(head->prev);
                conf->nr_queued--;
@@ -1548,7 +1567,6 @@ static void raid10d(mddev_t *mddev)
                        }
                }
        }
-        spin_unlock_irqrestore(&conf->device_lock, flags);
        if (unplug)
                unplug_slaves(mddev);
 }
author	NeilBrown <neilb@suse.de>	2008-03-04 17:29:29 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-03-04 19:35:17 -0500
commit	a35e63efa1fb18c6f20f38e3ddf3f8ffbcf0f6e7 (patch)
tree	8dddd54c45ebaad84a6178765d29d9536df944d1 /drivers/md/raid10.c
parent	466634488e80968f12e73dd1fe6af5c37a1fbfe2 (diff)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 017f58113c33..5de42d87bf4e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c
@@ -629,7 +629,36 @@ static int raid10_congested(void *data, int bits)
629	return ret;	629	return ret;
630	}	630	}
631		631
632		632	static int flush_pending_writes(conf_t *conf)
		633	{
		634	/* Any writes that have been queued but are awaiting
		635	* bitmap updates get flushed here.
		636	* We return 1 if any requests were actually submitted.
		637	*/
		638	int rv = 0;
		639
		640	spin_lock_irq(&conf->device_lock);
		641
		642	if (conf->pending_bio_list.head) {
		643	struct bio *bio;
		644	bio = bio_list_get(&conf->pending_bio_list);
		645	blk_remove_plug(conf->mddev->queue);
		646	spin_unlock_irq(&conf->device_lock);
		647	/* flush any pending bitmap writes to disk
		648	* before proceeding w/ I/O */
		649	bitmap_unplug(conf->mddev->bitmap);
		650
		651	while (bio) { /* submit pending writes */
		652	struct bio *next = bio->bi_next;
		653	bio->bi_next = NULL;
		654	generic_make_request(bio);
		655	bio = next;
		656	}
		657	rv = 1;
		658	} else
		659	spin_unlock_irq(&conf->device_lock);
		660	return rv;
		661	}
633	/* Barriers....	662	/* Barriers....
634	* Sometimes we need to suspend IO while we do something else,	663	* Sometimes we need to suspend IO while we do something else,
635	* either some resync/recovery, or reconfigure the array.	664	* either some resync/recovery, or reconfigure the array.
@@ -720,7 +749,8 @@ static void freeze_array(conf_t *conf)
720	wait_event_lock_irq(conf->wait_barrier,	749	wait_event_lock_irq(conf->wait_barrier,
721	conf->barrier+conf->nr_pending == conf->nr_queued+2,	750	conf->barrier+conf->nr_pending == conf->nr_queued+2,
722	conf->resync_lock,	751	conf->resync_lock,
723	raid10_unplug(conf->mddev->queue));	752	({ flush_pending_writes(conf);
		753	raid10_unplug(conf->mddev->queue); }));
724	spin_unlock_irq(&conf->resync_lock);	754	spin_unlock_irq(&conf->resync_lock);
725	}	755	}
726		756
@@ -892,6 +922,9 @@ static int make_request(struct request_queue q, struct bio bio)
892	blk_plug_device(mddev->queue);	922	blk_plug_device(mddev->queue);
893	spin_unlock_irqrestore(&conf->device_lock, flags);	923	spin_unlock_irqrestore(&conf->device_lock, flags);
894		924
		925	/* In case raid10d snuck in to freeze_array */
		926	wake_up(&conf->wait_barrier);
		927
895	if (do_sync)	928	if (do_sync)
896	md_wakeup_thread(mddev->thread);	929	md_wakeup_thread(mddev->thread);
897		930
@@ -1464,28 +1497,14 @@ static void raid10d(mddev_t *mddev)
1464		1497
1465	for (;;) {	1498	for (;;) {
1466	char b[BDEVNAME_SIZE];	1499	char b[BDEVNAME_SIZE];
1467	spin_lock_irqsave(&conf->device_lock, flags);
1468		1500
1469	if (conf->pending_bio_list.head) {	1501	unplug += flush_pending_writes(conf);
1470	bio = bio_list_get(&conf->pending_bio_list);
1471	blk_remove_plug(mddev->queue);
1472	spin_unlock_irqrestore(&conf->device_lock, flags);
1473	/* flush any pending bitmap writes to disk before proceeding w/ I/O */
1474	bitmap_unplug(mddev->bitmap);
1475		1502
1476	while (bio) { /* submit pending writes */	1503	spin_lock_irqsave(&conf->device_lock, flags);
1477	struct bio *next = bio->bi_next;	1504	if (list_empty(head)) {
1478	bio->bi_next = NULL;	1505	spin_unlock_irqrestore(&conf->device_lock, flags);
1479	generic_make_request(bio);
1480	bio = next;
1481	}
1482	unplug = 1;
1483
1484	continue;
1485	}
1486
1487	if (list_empty(head))
1488	break;	1506	break;
		1507	}
1489	r10_bio = list_entry(head->prev, r10bio_t, retry_list);	1508	r10_bio = list_entry(head->prev, r10bio_t, retry_list);
1490	list_del(head->prev);	1509	list_del(head->prev);
1491	conf->nr_queued--;	1510	conf->nr_queued--;
@@ -1548,7 +1567,6 @@ static void raid10d(mddev_t *mddev)
1548	}	1567	}
1549	}	1568	}
1550	}	1569	}
1551	spin_unlock_irqrestore(&conf->device_lock, flags);
1552	if (unplug)	1570	if (unplug)
1553	unplug_slaves(mddev);	1571	unplug_slaves(mddev);
1554	}	1572	}