aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-03-04 17:29:35 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-03-04 19:35:18 -0500
commit1c830532f6b44d10a1743ccd00e990c6b83396f5 (patch)
tree506c8fef4d84f54e76b533756cff86c45bb61746
parent8ed3a19563b6c05b7625649b1769ddb063d53253 (diff)
md: fix possible raid1/raid10 deadlock on read error during resync
Thanks to K.Tanaka and the scsi fault injection framework, here is a fix for another possible deadlock in raid1/raid10 error handing. If a read request returns an error while a resync is happening and a resync request is pending, the attempt to fix the error will block until the resync progresses, and the resync will block until the read request completes. Thus a deadlock. This patch fixes the problem. Cc: "K.Tanaka" <k-tanaka@ce.jp.nec.com> Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/raid1.c11
-rw-r--r--drivers/md/raid10.c11
2 files changed, 18 insertions, 4 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 38f076a3400d..ff61b309129a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -704,13 +704,20 @@ static void freeze_array(conf_t *conf)
704 /* stop syncio and normal IO and wait for everything to 704 /* stop syncio and normal IO and wait for everything to
705 * go quite. 705 * go quite.
706 * We increment barrier and nr_waiting, and then 706 * We increment barrier and nr_waiting, and then
707 * wait until barrier+nr_pending match nr_queued+2 707 * wait until nr_pending match nr_queued+1
708 * This is called in the context of one normal IO request
709 * that has failed. Thus any sync request that might be pending
710 * will be blocked by nr_pending, and we need to wait for
711 * pending IO requests to complete or be queued for re-try.
712 * Thus the number queued (nr_queued) plus this request (1)
713 * must match the number of pending IOs (nr_pending) before
714 * we continue.
708 */ 715 */
709 spin_lock_irq(&conf->resync_lock); 716 spin_lock_irq(&conf->resync_lock);
710 conf->barrier++; 717 conf->barrier++;
711 conf->nr_waiting++; 718 conf->nr_waiting++;
712 wait_event_lock_irq(conf->wait_barrier, 719 wait_event_lock_irq(conf->wait_barrier,
713 conf->barrier+conf->nr_pending == conf->nr_queued+2, 720 conf->nr_pending == conf->nr_queued+1,
714 conf->resync_lock, 721 conf->resync_lock,
715 ({ flush_pending_writes(conf); 722 ({ flush_pending_writes(conf);
716 raid1_unplug(conf->mddev->queue); })); 723 raid1_unplug(conf->mddev->queue); }));
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6c486d839c99..8e5671d2f3d3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -747,13 +747,20 @@ static void freeze_array(conf_t *conf)
747 /* stop syncio and normal IO and wait for everything to 747 /* stop syncio and normal IO and wait for everything to
748 * go quiet. 748 * go quiet.
749 * We increment barrier and nr_waiting, and then 749 * We increment barrier and nr_waiting, and then
750 * wait until barrier+nr_pending match nr_queued+2 750 * wait until nr_pending match nr_queued+1
751 * This is called in the context of one normal IO request
752 * that has failed. Thus any sync request that might be pending
753 * will be blocked by nr_pending, and we need to wait for
754 * pending IO requests to complete or be queued for re-try.
755 * Thus the number queued (nr_queued) plus this request (1)
756 * must match the number of pending IOs (nr_pending) before
757 * we continue.
751 */ 758 */
752 spin_lock_irq(&conf->resync_lock); 759 spin_lock_irq(&conf->resync_lock);
753 conf->barrier++; 760 conf->barrier++;
754 conf->nr_waiting++; 761 conf->nr_waiting++;
755 wait_event_lock_irq(conf->wait_barrier, 762 wait_event_lock_irq(conf->wait_barrier,
756 conf->barrier+conf->nr_pending == conf->nr_queued+2, 763 conf->nr_pending == conf->nr_queued+1,
757 conf->resync_lock, 764 conf->resync_lock,
758 ({ flush_pending_writes(conf); 765 ({ flush_pending_writes(conf);
759 raid10_unplug(conf->mddev->queue); })); 766 raid10_unplug(conf->mddev->queue); }));