diff options
author | NeilBrown <neilb@suse.de> | 2006-01-06 03:20:18 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-06 11:34:03 -0500 |
commit | d69762e98456b71167865db9e33e732a28dd36ab (patch) | |
tree | a93c3872d23db70e7e6089328cafd3eebba6aadb /drivers/md | |
parent | ca65b73bd9c301d243df93780f7b26579e6c9204 (diff) |
[PATCH] md: improve handing of read errors with raid6
This is a simple port of match functionality across from raid5. If we get a
read error, we don't kick the drive straight away, but try to over-write with
good data first.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid6main.c | 70 |
1 files changed, 66 insertions, 4 deletions
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 52e8796bb8ac..7a51553d8be5 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c | |||
@@ -367,8 +367,8 @@ static void shrink_stripes(raid6_conf_t *conf) | |||
367 | conf->slab_cache = NULL; | 367 | conf->slab_cache = NULL; |
368 | } | 368 | } |
369 | 369 | ||
370 | static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done, | 370 | static int raid6_end_read_request(struct bio * bi, unsigned int bytes_done, |
371 | int error) | 371 | int error) |
372 | { | 372 | { |
373 | struct stripe_head *sh = bi->bi_private; | 373 | struct stripe_head *sh = bi->bi_private; |
374 | raid6_conf_t *conf = sh->raid_conf; | 374 | raid6_conf_t *conf = sh->raid_conf; |
@@ -420,9 +420,35 @@ static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done, | |||
420 | #else | 420 | #else |
421 | set_bit(R5_UPTODATE, &sh->dev[i].flags); | 421 | set_bit(R5_UPTODATE, &sh->dev[i].flags); |
422 | #endif | 422 | #endif |
423 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | ||
424 | printk(KERN_INFO "raid6: read error corrected!!\n"); | ||
425 | clear_bit(R5_ReadError, &sh->dev[i].flags); | ||
426 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | ||
427 | } | ||
428 | if (atomic_read(&conf->disks[i].rdev->read_errors)) | ||
429 | atomic_set(&conf->disks[i].rdev->read_errors, 0); | ||
423 | } else { | 430 | } else { |
424 | md_error(conf->mddev, conf->disks[i].rdev); | 431 | int retry = 0; |
425 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); | 432 | clear_bit(R5_UPTODATE, &sh->dev[i].flags); |
433 | atomic_inc(&conf->disks[i].rdev->read_errors); | ||
434 | if (conf->mddev->degraded) | ||
435 | printk(KERN_WARNING "raid6: read error not correctable.\n"); | ||
436 | else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) | ||
437 | /* Oh, no!!! */ | ||
438 | printk(KERN_WARNING "raid6: read error NOT corrected!!\n"); | ||
439 | else if (atomic_read(&conf->disks[i].rdev->read_errors) | ||
440 | > conf->max_nr_stripes) | ||
441 | printk(KERN_WARNING | ||
442 | "raid6: Too many read errors, failing device.\n"); | ||
443 | else | ||
444 | retry = 1; | ||
445 | if (retry) | ||
446 | set_bit(R5_ReadError, &sh->dev[i].flags); | ||
447 | else { | ||
448 | clear_bit(R5_ReadError, &sh->dev[i].flags); | ||
449 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | ||
450 | md_error(conf->mddev, conf->disks[i].rdev); | ||
451 | } | ||
426 | } | 452 | } |
427 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); | 453 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); |
428 | #if 0 | 454 | #if 0 |
@@ -1079,6 +1105,12 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) | |||
1079 | if (dev->written) written++; | 1105 | if (dev->written) written++; |
1080 | rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ | 1106 | rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */ |
1081 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { | 1107 | if (!rdev || !test_bit(In_sync, &rdev->flags)) { |
1108 | /* The ReadError flag will just be confusing now */ | ||
1109 | clear_bit(R5_ReadError, &dev->flags); | ||
1110 | clear_bit(R5_ReWrite, &dev->flags); | ||
1111 | } | ||
1112 | if (!rdev || !test_bit(In_sync, &rdev->flags) | ||
1113 | || test_bit(R5_ReadError, &dev->flags)) { | ||
1082 | if ( failed < 2 ) | 1114 | if ( failed < 2 ) |
1083 | failed_num[failed] = i; | 1115 | failed_num[failed] = i; |
1084 | failed++; | 1116 | failed++; |
@@ -1095,6 +1127,14 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) | |||
1095 | if (failed > 2 && to_read+to_write+written) { | 1127 | if (failed > 2 && to_read+to_write+written) { |
1096 | for (i=disks; i--; ) { | 1128 | for (i=disks; i--; ) { |
1097 | int bitmap_end = 0; | 1129 | int bitmap_end = 0; |
1130 | |||
1131 | if (test_bit(R5_ReadError, &sh->dev[i].flags)) { | ||
1132 | mdk_rdev_t *rdev = conf->disks[i].rdev; | ||
1133 | if (rdev && test_bit(In_sync, &rdev->flags)) | ||
1134 | /* multiple read failures in one stripe */ | ||
1135 | md_error(conf->mddev, rdev); | ||
1136 | } | ||
1137 | |||
1098 | spin_lock_irq(&conf->device_lock); | 1138 | spin_lock_irq(&conf->device_lock); |
1099 | /* fail all writes first */ | 1139 | /* fail all writes first */ |
1100 | bi = sh->dev[i].towrite; | 1140 | bi = sh->dev[i].towrite; |
@@ -1130,7 +1170,8 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) | |||
1130 | } | 1170 | } |
1131 | 1171 | ||
1132 | /* fail any reads if this device is non-operational */ | 1172 | /* fail any reads if this device is non-operational */ |
1133 | if (!test_bit(R5_Insync, &sh->dev[i].flags)) { | 1173 | if (!test_bit(R5_Insync, &sh->dev[i].flags) || |
1174 | test_bit(R5_ReadError, &sh->dev[i].flags)) { | ||
1134 | bi = sh->dev[i].toread; | 1175 | bi = sh->dev[i].toread; |
1135 | sh->dev[i].toread = NULL; | 1176 | sh->dev[i].toread = NULL; |
1136 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 1177 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
@@ -1457,6 +1498,27 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) | |||
1457 | clear_bit(STRIPE_SYNCING, &sh->state); | 1498 | clear_bit(STRIPE_SYNCING, &sh->state); |
1458 | } | 1499 | } |
1459 | 1500 | ||
1501 | /* If the failed drives are just a ReadError, then we might need | ||
1502 | * to progress the repair/check process | ||
1503 | */ | ||
1504 | if (failed <= 2 && ! conf->mddev->ro) | ||
1505 | for (i=0; i<failed;i++) { | ||
1506 | dev = &sh->dev[failed_num[i]]; | ||
1507 | if (test_bit(R5_ReadError, &dev->flags) | ||
1508 | && !test_bit(R5_LOCKED, &dev->flags) | ||
1509 | && test_bit(R5_UPTODATE, &dev->flags) | ||
1510 | ) { | ||
1511 | if (!test_bit(R5_ReWrite, &dev->flags)) { | ||
1512 | set_bit(R5_Wantwrite, &dev->flags); | ||
1513 | set_bit(R5_ReWrite, &dev->flags); | ||
1514 | set_bit(R5_LOCKED, &dev->flags); | ||
1515 | } else { | ||
1516 | /* let's read it back */ | ||
1517 | set_bit(R5_Wantread, &dev->flags); | ||
1518 | set_bit(R5_LOCKED, &dev->flags); | ||
1519 | } | ||
1520 | } | ||
1521 | } | ||
1460 | spin_unlock(&sh->lock); | 1522 | spin_unlock(&sh->lock); |
1461 | 1523 | ||
1462 | while ((bi=return_bi)) { | 1524 | while ((bi=return_bi)) { |