aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-04-01 09:48:38 -0400
committerNeilBrown <neilb@suse.de>2012-04-03 01:36:17 -0400
commit18b9837ea0dc3cf844c6c4196871ce91d047bddb (patch)
treede8be80b767d144405aeeaf7238c723f575b4b40 /drivers/md/raid5.c
parent5220ea1e640869e70f894837678315c878c651fd (diff)
md/raid5: fix handling of bad blocks during recovery.
1/ We can only treat a known-bad-block like a read-error if we have the data that belongs in that block. So fix that test. 2/ If we cannot recovery a stripe due to insufficient data, don't tell "md_done_sync" that the sync failed unless we really did fail something. If we successfully record bad blocks, that is success. Reported-by: "majianpeng" <majianpeng@gmail.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c55
1 files changed, 29 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 23ac880bba9a..9799be80bf31 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
2471 int abort = 0; 2471 int abort = 0;
2472 int i; 2472 int i;
2473 2473
2474 md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
2475 clear_bit(STRIPE_SYNCING, &sh->state); 2474 clear_bit(STRIPE_SYNCING, &sh->state);
2476 s->syncing = 0; 2475 s->syncing = 0;
2477 s->replacing = 0; 2476 s->replacing = 0;
2478 /* There is nothing more to do for sync/check/repair. 2477 /* There is nothing more to do for sync/check/repair.
2478 * Don't even need to abort as that is handled elsewhere
2479 * if needed, and not always wanted e.g. if there is a known
2480 * bad block here.
2479 * For recover/replace we need to record a bad block on all 2481 * For recover/replace we need to record a bad block on all
2480 * non-sync devices, or abort the recovery 2482 * non-sync devices, or abort the recovery
2481 */ 2483 */
2482 if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) 2484 if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
2483 return; 2485 /* During recovery devices cannot be removed, so
2484 /* During recovery devices cannot be removed, so locking and 2486 * locking and refcounting of rdevs is not needed
2485 * refcounting of rdevs is not needed 2487 */
2486 */ 2488 for (i = 0; i < conf->raid_disks; i++) {
2487 for (i = 0; i < conf->raid_disks; i++) { 2489 struct md_rdev *rdev = conf->disks[i].rdev;
2488 struct md_rdev *rdev = conf->disks[i].rdev; 2490 if (rdev
2489 if (rdev 2491 && !test_bit(Faulty, &rdev->flags)
2490 && !test_bit(Faulty, &rdev->flags) 2492 && !test_bit(In_sync, &rdev->flags)
2491 && !test_bit(In_sync, &rdev->flags) 2493 && !rdev_set_badblocks(rdev, sh->sector,
2492 && !rdev_set_badblocks(rdev, sh->sector, 2494 STRIPE_SECTORS, 0))
2493 STRIPE_SECTORS, 0)) 2495 abort = 1;
2494 abort = 1; 2496 rdev = conf->disks[i].replacement;
2495 rdev = conf->disks[i].replacement; 2497 if (rdev
2496 if (rdev 2498 && !test_bit(Faulty, &rdev->flags)
2497 && !test_bit(Faulty, &rdev->flags) 2499 && !test_bit(In_sync, &rdev->flags)
2498 && !test_bit(In_sync, &rdev->flags) 2500 && !rdev_set_badblocks(rdev, sh->sector,
2499 && !rdev_set_badblocks(rdev, sh->sector, 2501 STRIPE_SECTORS, 0))
2500 STRIPE_SECTORS, 0)) 2502 abort = 1;
2501 abort = 1; 2503 }
2502 } 2504 if (abort)
2503 if (abort) { 2505 conf->recovery_disabled =
2504 conf->recovery_disabled = conf->mddev->recovery_disabled; 2506 conf->mddev->recovery_disabled;
2505 set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
2506 } 2507 }
2508 md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
2507} 2509}
2508 2510
2509static int want_replace(struct stripe_head *sh, int disk_idx) 2511static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3203 /* Not in-sync */; 3205 /* Not in-sync */;
3204 else if (is_bad) { 3206 else if (is_bad) {
3205 /* also not in-sync */ 3207 /* also not in-sync */
3206 if (!test_bit(WriteErrorSeen, &rdev->flags)) { 3208 if (!test_bit(WriteErrorSeen, &rdev->flags) &&
3209 test_bit(R5_UPTODATE, &dev->flags)) {
3207 /* treat as in-sync, but with a read error 3210 /* treat as in-sync, but with a read error
3208 * which we can now try to correct 3211 * which we can now try to correct
3209 */ 3212 */