diff options
author | NeilBrown <neilb@suse.de> | 2007-02-28 23:11:48 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-03-01 17:53:36 -0500 |
commit | b4c4c7b8095298ff4ce20b40bf180ada070812d0 (patch) | |
tree | 2c51fe8ef220be17c791f8aca1218cadcae22b7f /drivers/md | |
parent | d1b5380c7f794da16e815c34e54ee7641db8a288 (diff) |
[PATCH] md: restart a (raid5) reshape that has been aborted due to a read/write error
An error always aborts any resync/recovery/reshape on the understanding that
it will immediately be restarted if that still makes sense. However a reshape
currently doesn't get restarted. With this patch it does.
To avoid restarting when it is not possible to do work, we call into the
personality to check that a reshape is ok, and strengthen raid5_check_reshape
to fail if there are too many failed devices.
We also break some code out into a separate function: remove_and_add_spares as
the indent level for that code was getting crazy.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 74 | ||||
-rw-r--r-- | drivers/md/raid5.c | 2 |
2 files changed, 47 insertions, 29 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index b5744b1bd2ba..6c06e825cff5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -5357,6 +5357,44 @@ void md_do_sync(mddev_t *mddev) | |||
5357 | EXPORT_SYMBOL_GPL(md_do_sync); | 5357 | EXPORT_SYMBOL_GPL(md_do_sync); |
5358 | 5358 | ||
5359 | 5359 | ||
5360 | static int remove_and_add_spares(mddev_t *mddev) | ||
5361 | { | ||
5362 | mdk_rdev_t *rdev; | ||
5363 | struct list_head *rtmp; | ||
5364 | int spares = 0; | ||
5365 | |||
5366 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5367 | if (rdev->raid_disk >= 0 && | ||
5368 | (test_bit(Faulty, &rdev->flags) || | ||
5369 | ! test_bit(In_sync, &rdev->flags)) && | ||
5370 | atomic_read(&rdev->nr_pending)==0) { | ||
5371 | if (mddev->pers->hot_remove_disk( | ||
5372 | mddev, rdev->raid_disk)==0) { | ||
5373 | char nm[20]; | ||
5374 | sprintf(nm,"rd%d", rdev->raid_disk); | ||
5375 | sysfs_remove_link(&mddev->kobj, nm); | ||
5376 | rdev->raid_disk = -1; | ||
5377 | } | ||
5378 | } | ||
5379 | |||
5380 | if (mddev->degraded) { | ||
5381 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5382 | if (rdev->raid_disk < 0 | ||
5383 | && !test_bit(Faulty, &rdev->flags)) { | ||
5384 | rdev->recovery_offset = 0; | ||
5385 | if (mddev->pers->hot_add_disk(mddev,rdev)) { | ||
5386 | char nm[20]; | ||
5387 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
5388 | sysfs_create_link(&mddev->kobj, | ||
5389 | &rdev->kobj, nm); | ||
5390 | spares++; | ||
5391 | md_new_event(mddev); | ||
5392 | } else | ||
5393 | break; | ||
5394 | } | ||
5395 | } | ||
5396 | return spares; | ||
5397 | } | ||
5360 | /* | 5398 | /* |
5361 | * This routine is regularly called by all per-raid-array threads to | 5399 | * This routine is regularly called by all per-raid-array threads to |
5362 | * deal with generic issues like resync and super-block update. | 5400 | * deal with generic issues like resync and super-block update. |
@@ -5411,7 +5449,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5411 | return; | 5449 | return; |
5412 | 5450 | ||
5413 | if (mddev_trylock(mddev)) { | 5451 | if (mddev_trylock(mddev)) { |
5414 | int spares =0; | 5452 | int spares = 0; |
5415 | 5453 | ||
5416 | spin_lock_irq(&mddev->write_lock); | 5454 | spin_lock_irq(&mddev->write_lock); |
5417 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5455 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
@@ -5474,35 +5512,13 @@ void md_check_recovery(mddev_t *mddev) | |||
5474 | * Spare are also removed and re-added, to allow | 5512 | * Spare are also removed and re-added, to allow |
5475 | * the personality to fail the re-add. | 5513 | * the personality to fail the re-add. |
5476 | */ | 5514 | */ |
5477 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5478 | if (rdev->raid_disk >= 0 && | ||
5479 | (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && | ||
5480 | atomic_read(&rdev->nr_pending)==0) { | ||
5481 | if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0) { | ||
5482 | char nm[20]; | ||
5483 | sprintf(nm,"rd%d", rdev->raid_disk); | ||
5484 | sysfs_remove_link(&mddev->kobj, nm); | ||
5485 | rdev->raid_disk = -1; | ||
5486 | } | ||
5487 | } | ||
5488 | |||
5489 | if (mddev->degraded) { | ||
5490 | ITERATE_RDEV(mddev,rdev,rtmp) | ||
5491 | if (rdev->raid_disk < 0 | ||
5492 | && !test_bit(Faulty, &rdev->flags)) { | ||
5493 | rdev->recovery_offset = 0; | ||
5494 | if (mddev->pers->hot_add_disk(mddev,rdev)) { | ||
5495 | char nm[20]; | ||
5496 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
5497 | sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); | ||
5498 | spares++; | ||
5499 | md_new_event(mddev); | ||
5500 | } else | ||
5501 | break; | ||
5502 | } | ||
5503 | } | ||
5504 | 5515 | ||
5505 | if (spares) { | 5516 | if (mddev->reshape_position != MaxSector) { |
5517 | if (mddev->pers->check_reshape(mddev) != 0) | ||
5518 | /* Cannot proceed */ | ||
5519 | goto unlock; | ||
5520 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
5521 | } else if ((spares = remove_and_add_spares(mddev))) { | ||
5506 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5522 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
5507 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 5523 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
5508 | } else if (mddev->recovery_cp < MaxSector) { | 5524 | } else if (mddev->recovery_cp < MaxSector) { |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 11c3d7bfa797..29fc06b47d4e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3814,6 +3814,8 @@ static int raid5_check_reshape(mddev_t *mddev) | |||
3814 | if (err) | 3814 | if (err) |
3815 | return err; | 3815 | return err; |
3816 | 3816 | ||
3817 | if (mddev->degraded > conf->max_degraded) | ||
3818 | return -EINVAL; | ||
3817 | /* looks like we might be able to manage this */ | 3819 | /* looks like we might be able to manage this */ |
3818 | return 0; | 3820 | return 0; |
3819 | } | 3821 | } |