diff options
author | NeilBrown <neilb@suse.de> | 2008-03-04 17:29:35 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-03-04 19:35:18 -0500 |
commit | 1c830532f6b44d10a1743ccd00e990c6b83396f5 (patch) | |
tree | 506c8fef4d84f54e76b533756cff86c45bb61746 | |
parent | 8ed3a19563b6c05b7625649b1769ddb063d53253 (diff) |
md: fix possible raid1/raid10 deadlock on read error during resync
Thanks to K.Tanaka and the scsi fault injection framework, here is a fix for
another possible deadlock in raid1/raid10 error handing.
If a read request returns an error while a resync is happening and a resync
request is pending, the attempt to fix the error will block until the resync
progresses, and the resync will block until the read request completes. Thus
a deadlock.
This patch fixes the problem.
Cc: "K.Tanaka" <k-tanaka@ce.jp.nec.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/md/raid1.c | 11 | ||||
-rw-r--r-- | drivers/md/raid10.c | 11 |
2 files changed, 18 insertions, 4 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 38f076a3400d..ff61b309129a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -704,13 +704,20 @@ static void freeze_array(conf_t *conf) | |||
704 | /* stop syncio and normal IO and wait for everything to | 704 | /* stop syncio and normal IO and wait for everything to |
705 | * go quite. | 705 | * go quite. |
706 | * We increment barrier and nr_waiting, and then | 706 | * We increment barrier and nr_waiting, and then |
707 | * wait until barrier+nr_pending match nr_queued+2 | 707 | * wait until nr_pending match nr_queued+1 |
708 | * This is called in the context of one normal IO request | ||
709 | * that has failed. Thus any sync request that might be pending | ||
710 | * will be blocked by nr_pending, and we need to wait for | ||
711 | * pending IO requests to complete or be queued for re-try. | ||
712 | * Thus the number queued (nr_queued) plus this request (1) | ||
713 | * must match the number of pending IOs (nr_pending) before | ||
714 | * we continue. | ||
708 | */ | 715 | */ |
709 | spin_lock_irq(&conf->resync_lock); | 716 | spin_lock_irq(&conf->resync_lock); |
710 | conf->barrier++; | 717 | conf->barrier++; |
711 | conf->nr_waiting++; | 718 | conf->nr_waiting++; |
712 | wait_event_lock_irq(conf->wait_barrier, | 719 | wait_event_lock_irq(conf->wait_barrier, |
713 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | 720 | conf->nr_pending == conf->nr_queued+1, |
714 | conf->resync_lock, | 721 | conf->resync_lock, |
715 | ({ flush_pending_writes(conf); | 722 | ({ flush_pending_writes(conf); |
716 | raid1_unplug(conf->mddev->queue); })); | 723 | raid1_unplug(conf->mddev->queue); })); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6c486d839c99..8e5671d2f3d3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -747,13 +747,20 @@ static void freeze_array(conf_t *conf) | |||
747 | /* stop syncio and normal IO and wait for everything to | 747 | /* stop syncio and normal IO and wait for everything to |
748 | * go quiet. | 748 | * go quiet. |
749 | * We increment barrier and nr_waiting, and then | 749 | * We increment barrier and nr_waiting, and then |
750 | * wait until barrier+nr_pending match nr_queued+2 | 750 | * wait until nr_pending match nr_queued+1 |
751 | * This is called in the context of one normal IO request | ||
752 | * that has failed. Thus any sync request that might be pending | ||
753 | * will be blocked by nr_pending, and we need to wait for | ||
754 | * pending IO requests to complete or be queued for re-try. | ||
755 | * Thus the number queued (nr_queued) plus this request (1) | ||
756 | * must match the number of pending IOs (nr_pending) before | ||
757 | * we continue. | ||
751 | */ | 758 | */ |
752 | spin_lock_irq(&conf->resync_lock); | 759 | spin_lock_irq(&conf->resync_lock); |
753 | conf->barrier++; | 760 | conf->barrier++; |
754 | conf->nr_waiting++; | 761 | conf->nr_waiting++; |
755 | wait_event_lock_irq(conf->wait_barrier, | 762 | wait_event_lock_irq(conf->wait_barrier, |
756 | conf->barrier+conf->nr_pending == conf->nr_queued+2, | 763 | conf->nr_pending == conf->nr_queued+1, |
757 | conf->resync_lock, | 764 | conf->resync_lock, |
758 | ({ flush_pending_writes(conf); | 765 | ({ flush_pending_writes(conf); |
759 | raid10_unplug(conf->mddev->queue); })); | 766 | raid10_unplug(conf->mddev->queue); })); |