aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2007-02-28 23:11:48 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-03-01 17:53:36 -0500
commitb4c4c7b8095298ff4ce20b40bf180ada070812d0 (patch)
tree2c51fe8ef220be17c791f8aca1218cadcae22b7f
parentd1b5380c7f794da16e815c34e54ee7641db8a288 (diff)
[PATCH] md: restart a (raid5) reshape that has been aborted due to a read/write error
An error always aborts any resync/recovery/reshape on the understanding that it will immediately be restarted if that still makes sense. However a reshape currently doesn't get restarted. With this patch it does. To avoid restarting when it is not possible to do work, we call into the personality to check that a reshape is ok, and strengthen raid5_check_reshape to fail if there are too many failed devices. We also break some code out into a separate function: remove_and_add_spares as the indent level for that code was getting crazy. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/md.c74
-rw-r--r--drivers/md/raid5.c2
2 files changed, 47 insertions, 29 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b5744b1bd2ba..6c06e825cff5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5357,6 +5357,44 @@ void md_do_sync(mddev_t *mddev)
5357EXPORT_SYMBOL_GPL(md_do_sync); 5357EXPORT_SYMBOL_GPL(md_do_sync);
5358 5358
5359 5359
5360static int remove_and_add_spares(mddev_t *mddev)
5361{
5362 mdk_rdev_t *rdev;
5363 struct list_head *rtmp;
5364 int spares = 0;
5365
5366 ITERATE_RDEV(mddev,rdev,rtmp)
5367 if (rdev->raid_disk >= 0 &&
5368 (test_bit(Faulty, &rdev->flags) ||
5369 ! test_bit(In_sync, &rdev->flags)) &&
5370 atomic_read(&rdev->nr_pending)==0) {
5371 if (mddev->pers->hot_remove_disk(
5372 mddev, rdev->raid_disk)==0) {
5373 char nm[20];
5374 sprintf(nm,"rd%d", rdev->raid_disk);
5375 sysfs_remove_link(&mddev->kobj, nm);
5376 rdev->raid_disk = -1;
5377 }
5378 }
5379
5380 if (mddev->degraded) {
5381 ITERATE_RDEV(mddev,rdev,rtmp)
5382 if (rdev->raid_disk < 0
5383 && !test_bit(Faulty, &rdev->flags)) {
5384 rdev->recovery_offset = 0;
5385 if (mddev->pers->hot_add_disk(mddev,rdev)) {
5386 char nm[20];
5387 sprintf(nm, "rd%d", rdev->raid_disk);
5388 sysfs_create_link(&mddev->kobj,
5389 &rdev->kobj, nm);
5390 spares++;
5391 md_new_event(mddev);
5392 } else
5393 break;
5394 }
5395 }
5396 return spares;
5397}
5360/* 5398/*
5361 * This routine is regularly called by all per-raid-array threads to 5399 * This routine is regularly called by all per-raid-array threads to
5362 * deal with generic issues like resync and super-block update. 5400 * deal with generic issues like resync and super-block update.
@@ -5411,7 +5449,7 @@ void md_check_recovery(mddev_t *mddev)
5411 return; 5449 return;
5412 5450
5413 if (mddev_trylock(mddev)) { 5451 if (mddev_trylock(mddev)) {
5414 int spares =0; 5452 int spares = 0;
5415 5453
5416 spin_lock_irq(&mddev->write_lock); 5454 spin_lock_irq(&mddev->write_lock);
5417 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 5455 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
@@ -5474,35 +5512,13 @@ void md_check_recovery(mddev_t *mddev)
5474 * Spare are also removed and re-added, to allow 5512 * Spare are also removed and re-added, to allow
5475 * the personality to fail the re-add. 5513 * the personality to fail the re-add.
5476 */ 5514 */
5477 ITERATE_RDEV(mddev,rdev,rtmp)
5478 if (rdev->raid_disk >= 0 &&
5479 (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) &&
5480 atomic_read(&rdev->nr_pending)==0) {
5481 if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) {
5482 char nm[20];
5483 sprintf(nm,"rd%d", rdev->raid_disk);
5484 sysfs_remove_link(&mddev->kobj, nm);
5485 rdev->raid_disk = -1;
5486 }
5487 }
5488
5489 if (mddev->degraded) {
5490 ITERATE_RDEV(mddev,rdev,rtmp)
5491 if (rdev->raid_disk < 0
5492 && !test_bit(Faulty, &rdev->flags)) {
5493 rdev->recovery_offset = 0;
5494 if (mddev->pers->hot_add_disk(mddev,rdev)) {
5495 char nm[20];
5496 sprintf(nm, "rd%d", rdev->raid_disk);
5497 sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
5498 spares++;
5499 md_new_event(mddev);
5500 } else
5501 break;
5502 }
5503 }
5504 5515
5505 if (spares) { 5516 if (mddev->reshape_position != MaxSector) {
5517 if (mddev->pers->check_reshape(mddev) != 0)
5518 /* Cannot proceed */
5519 goto unlock;
5520 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
5521 } else if ((spares = remove_and_add_spares(mddev))) {
5506 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 5522 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
5507 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 5523 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
5508 } else if (mddev->recovery_cp < MaxSector) { 5524 } else if (mddev->recovery_cp < MaxSector) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 11c3d7bfa797..29fc06b47d4e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3814,6 +3814,8 @@ static int raid5_check_reshape(mddev_t *mddev)
3814 if (err) 3814 if (err)
3815 return err; 3815 return err;
3816 3816
3817 if (mddev->degraded > conf->max_degraded)
3818 return -EINVAL;
3817 /* looks like we might be able to manage this */ 3819 /* looks like we might be able to manage this */
3818 return 0; 3820 return 0;
3819} 3821}