aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-01-08 16:31:11 -0500
committerNeilBrown <neilb@suse.de>2009-01-08 16:31:11 -0500
commit4044ba58dd15cb01797c4fd034f39ef4a75f7cc3 (patch)
treee1bea8143538fc3eaeeb8578c2f9231e32809a25
parentefeb53c0e57213e843b7ef3cc6ebcdea7d6186ac (diff)
md: don't retry recovery of raid1 that fails due to error on source drive.
If a raid1 has only one working drive and it has a sector which gives an error on read, then an attempt to recover onto a spare will fail, but as the single remaining drive is not removed from the array, the recovery will be immediately re-attempted, resulting in an infinite recovery loop. So detect this situation and don't retry recovery once an error on the lone remaining drive is detected. Allow recovery to be retried once every time a spare is added in case the problem wasn't actually a media error. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/md.c5
-rw-r--r--drivers/md/raid1.c8
-rw-r--r--include/linux/raid/md_k.h3
3 files changed, 13 insertions, 3 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f5cbb9d2371a..41e2509bf896 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1500,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1500 1500
1501 list_add_rcu(&rdev->same_set, &mddev->disks); 1501 list_add_rcu(&rdev->same_set, &mddev->disks);
1502 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); 1502 bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
1503
1504 /* May as well allow recovery to be retried once */
1505 mddev->recovery_disabled = 0;
1503 return 0; 1506 return 0;
1504 1507
1505 fail: 1508 fail:
@@ -6175,7 +6178,7 @@ static int remove_and_add_spares(mddev_t *mddev)
6175 } 6178 }
6176 } 6179 }
6177 6180
6178 if (mddev->degraded && ! mddev->ro) { 6181 if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
6179 list_for_each_entry(rdev, &mddev->disks, same_set) { 6182 list_for_each_entry(rdev, &mddev->disks, same_set) {
6180 if (rdev->raid_disk >= 0 && 6183 if (rdev->raid_disk >= 0 &&
6181 !test_bit(In_sync, &rdev->flags) && 6184 !test_bit(In_sync, &rdev->flags) &&
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c165b1eed8bb..7b4f5f7155d8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
1016 * else mark the drive as failed 1016 * else mark the drive as failed
1017 */ 1017 */
1018 if (test_bit(In_sync, &rdev->flags) 1018 if (test_bit(In_sync, &rdev->flags)
1019 && (conf->raid_disks - mddev->degraded) == 1) 1019 && (conf->raid_disks - mddev->degraded) == 1) {
1020 /* 1020 /*
1021 * Don't fail the drive, act as though we were just a 1021 * Don't fail the drive, act as though we were just a
1022 * normal single drive 1022 * normal single drive.
1023 * However don't try a recovery from this drive as
1024 * it is very likely to fail.
1023 */ 1025 */
1026 mddev->recovery_disabled = 1;
1024 return; 1027 return;
1028 }
1025 if (test_and_clear_bit(In_sync, &rdev->flags)) { 1029 if (test_and_clear_bit(In_sync, &rdev->flags)) {
1026 unsigned long flags; 1030 unsigned long flags;
1027 spin_lock_irqsave(&conf->device_lock, flags); 1031 spin_lock_irqsave(&conf->device_lock, flags);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index dac4217194b8..9743e4dbc918 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -218,6 +218,9 @@ struct mddev_s
218#define MD_RECOVERY_FROZEN 9 218#define MD_RECOVERY_FROZEN 9
219 219
220 unsigned long recovery; 220 unsigned long recovery;
221 int recovery_disabled; /* if we detect that recovery
222 * will always fail, set this
223 * so we don't loop trying */
221 224
222 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
223 struct mutex reconfig_mutex; 226 struct mutex reconfig_mutex;