diff options
author | NeilBrown <neilb@suse.de> | 2012-04-01 09:48:38 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-04-03 01:36:17 -0400 |
commit | 18b9837ea0dc3cf844c6c4196871ce91d047bddb (patch) | |
tree | de8be80b767d144405aeeaf7238c723f575b4b40 /drivers/md/raid5.c | |
parent | 5220ea1e640869e70f894837678315c878c651fd (diff) |
md/raid5: fix handling of bad blocks during recovery.
1/ We can only treat a known-bad-block like a read-error if we
have the data that belongs in that block. So fix that test.
2/ If we cannot recovery a stripe due to insufficient data,
don't tell "md_done_sync" that the sync failed unless we really
did fail something. If we successfully record bad blocks,
that is success.
Reported-by: "majianpeng" <majianpeng@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 55 |
1 files changed, 29 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 23ac880bba9a..9799be80bf31 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, | |||
2471 | int abort = 0; | 2471 | int abort = 0; |
2472 | int i; | 2472 | int i; |
2473 | 2473 | ||
2474 | md_done_sync(conf->mddev, STRIPE_SECTORS, 0); | ||
2475 | clear_bit(STRIPE_SYNCING, &sh->state); | 2474 | clear_bit(STRIPE_SYNCING, &sh->state); |
2476 | s->syncing = 0; | 2475 | s->syncing = 0; |
2477 | s->replacing = 0; | 2476 | s->replacing = 0; |
2478 | /* There is nothing more to do for sync/check/repair. | 2477 | /* There is nothing more to do for sync/check/repair. |
2478 | * Don't even need to abort as that is handled elsewhere | ||
2479 | * if needed, and not always wanted e.g. if there is a known | ||
2480 | * bad block here. | ||
2479 | * For recover/replace we need to record a bad block on all | 2481 | * For recover/replace we need to record a bad block on all |
2480 | * non-sync devices, or abort the recovery | 2482 | * non-sync devices, or abort the recovery |
2481 | */ | 2483 | */ |
2482 | if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) | 2484 | if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) { |
2483 | return; | 2485 | /* During recovery devices cannot be removed, so |
2484 | /* During recovery devices cannot be removed, so locking and | 2486 | * locking and refcounting of rdevs is not needed |
2485 | * refcounting of rdevs is not needed | 2487 | */ |
2486 | */ | 2488 | for (i = 0; i < conf->raid_disks; i++) { |
2487 | for (i = 0; i < conf->raid_disks; i++) { | 2489 | struct md_rdev *rdev = conf->disks[i].rdev; |
2488 | struct md_rdev *rdev = conf->disks[i].rdev; | 2490 | if (rdev |
2489 | if (rdev | 2491 | && !test_bit(Faulty, &rdev->flags) |
2490 | && !test_bit(Faulty, &rdev->flags) | 2492 | && !test_bit(In_sync, &rdev->flags) |
2491 | && !test_bit(In_sync, &rdev->flags) | 2493 | && !rdev_set_badblocks(rdev, sh->sector, |
2492 | && !rdev_set_badblocks(rdev, sh->sector, | 2494 | STRIPE_SECTORS, 0)) |
2493 | STRIPE_SECTORS, 0)) | 2495 | abort = 1; |
2494 | abort = 1; | 2496 | rdev = conf->disks[i].replacement; |
2495 | rdev = conf->disks[i].replacement; | 2497 | if (rdev |
2496 | if (rdev | 2498 | && !test_bit(Faulty, &rdev->flags) |
2497 | && !test_bit(Faulty, &rdev->flags) | 2499 | && !test_bit(In_sync, &rdev->flags) |
2498 | && !test_bit(In_sync, &rdev->flags) | 2500 | && !rdev_set_badblocks(rdev, sh->sector, |
2499 | && !rdev_set_badblocks(rdev, sh->sector, | 2501 | STRIPE_SECTORS, 0)) |
2500 | STRIPE_SECTORS, 0)) | 2502 | abort = 1; |
2501 | abort = 1; | 2503 | } |
2502 | } | 2504 | if (abort) |
2503 | if (abort) { | 2505 | conf->recovery_disabled = |
2504 | conf->recovery_disabled = conf->mddev->recovery_disabled; | 2506 | conf->mddev->recovery_disabled; |
2505 | set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); | ||
2506 | } | 2507 | } |
2508 | md_done_sync(conf->mddev, STRIPE_SECTORS, !abort); | ||
2507 | } | 2509 | } |
2508 | 2510 | ||
2509 | static int want_replace(struct stripe_head *sh, int disk_idx) | 2511 | static int want_replace(struct stripe_head *sh, int disk_idx) |
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3203 | /* Not in-sync */; | 3205 | /* Not in-sync */; |
3204 | else if (is_bad) { | 3206 | else if (is_bad) { |
3205 | /* also not in-sync */ | 3207 | /* also not in-sync */ |
3206 | if (!test_bit(WriteErrorSeen, &rdev->flags)) { | 3208 | if (!test_bit(WriteErrorSeen, &rdev->flags) && |
3209 | test_bit(R5_UPTODATE, &dev->flags)) { | ||
3207 | /* treat as in-sync, but with a read error | 3210 | /* treat as in-sync, but with a read error |
3208 | * which we can now try to correct | 3211 | * which we can now try to correct |
3209 | */ | 3212 | */ |