aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-03-18 21:46:38 -0400
committerNeilBrown <neilb@suse.de>2012-03-18 21:46:38 -0400
commitd6b42dcb995e6acd7cc276774e751ffc9f0ef4bf (patch)
treea9112351e8ddd2866afd8687b645a1c5bf574ee7 /drivers/md
parent4474ca42e2577563a919fd3ed782e2ec55bf11a2 (diff)
md/raid1,raid10: avoid deadlock during resync/recovery.
If RAID1 or RAID10 is used under LVM or some other stacking block device, it is possible to enter a deadlock during resync or recovery. This can happen if the upper level block device creates two requests to the RAID1 or RAID10. The first request gets processed, blocks recovery and queue requests for underlying requests in current->bio_list. A resync request then starts which will wait for those requests and block new IO. But then the second request to the RAID1/10 will be attempted and it cannot progress until the resync request completes, which cannot progress until the underlying device requests complete, which are on a queue behind that second request. So allow that second request to proceed even though there is a resync request about to start. This is suitable for any -stable kernel. Cc: stable@vger.kernel.org Reported-by: Ray Morris <support@bettercgi.com> Tested-by: Ray Morris <support@bettercgi.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c17
-rw-r--r--drivers/md/raid10.c17
2 files changed, 30 insertions, 4 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a0b225eb4ac4..118e0f69f224 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -737,9 +737,22 @@ static void wait_barrier(struct r1conf *conf)
737 spin_lock_irq(&conf->resync_lock); 737 spin_lock_irq(&conf->resync_lock);
738 if (conf->barrier) { 738 if (conf->barrier) {
739 conf->nr_waiting++; 739 conf->nr_waiting++;
740 wait_event_lock_irq(conf->wait_barrier, !conf->barrier, 740 /* Wait for the barrier to drop.
741 * However if there are already pending
742 * requests (preventing the barrier from
743 * rising completely), and the
744 * pre-process bio queue isn't empty,
745 * then don't wait, as we need to empty
746 * that queue to get the nr_pending
747 * count down.
748 */
749 wait_event_lock_irq(conf->wait_barrier,
750 !conf->barrier ||
751 (conf->nr_pending &&
752 current->bio_list &&
753 !bio_list_empty(current->bio_list)),
741 conf->resync_lock, 754 conf->resync_lock,
742 ); 755 );
743 conf->nr_waiting--; 756 conf->nr_waiting--;
744 } 757 }
745 conf->nr_pending++; 758 conf->nr_pending++;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f4f3edcdaf8d..2ae7021320e1 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -863,9 +863,22 @@ static void wait_barrier(struct r10conf *conf)
863 spin_lock_irq(&conf->resync_lock); 863 spin_lock_irq(&conf->resync_lock);
864 if (conf->barrier) { 864 if (conf->barrier) {
865 conf->nr_waiting++; 865 conf->nr_waiting++;
866 wait_event_lock_irq(conf->wait_barrier, !conf->barrier, 866 /* Wait for the barrier to drop.
867 * However if there are already pending
868 * requests (preventing the barrier from
869 * rising completely), and the
870 * pre-process bio queue isn't empty,
871 * then don't wait, as we need to empty
872 * that queue to get the nr_pending
873 * count down.
874 */
875 wait_event_lock_irq(conf->wait_barrier,
876 !conf->barrier ||
877 (conf->nr_pending &&
878 current->bio_list &&
879 !bio_list_empty(current->bio_list)),
867 conf->resync_lock, 880 conf->resync_lock,
868 ); 881 );
869 conf->nr_waiting--; 882 conf->nr_waiting--;
870 } 883 }
871 conf->nr_pending++; 884 conf->nr_pending++;