diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:22 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:22 -0400 |
commit | 7f0da59bdc2f65795a57009d78f7753d3aea1de3 (patch) | |
tree | 4ab3ae7188122657c12dfab61626d96360fb6240 | |
parent | 31c176ecdf3563140e6395249eda51a18130d9f6 (diff) |
md/raid5: use bad-block log to improve handling of uncorrectable read errors.
If we get an uncorrectable read error - record a bad block rather than
failing the device.
And if these errors (which may be due to known bad blocks) cause
recovery to be impossible, record a bad block on the recovering
devices, or abort the recovery.
As we might abort a recovery without failing a device we need to teach
RAID5 about recovery_disabled handling.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 59 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
2 files changed, 53 insertions, 8 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a2d68389ee75..5fc621673e6c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2232,9 +2232,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, | |||
2232 | rcu_read_lock(); | 2232 | rcu_read_lock(); |
2233 | rdev = rcu_dereference(conf->disks[i].rdev); | 2233 | rdev = rcu_dereference(conf->disks[i].rdev); |
2234 | if (rdev && test_bit(In_sync, &rdev->flags)) | 2234 | if (rdev && test_bit(In_sync, &rdev->flags)) |
2235 | /* multiple read failures in one stripe */ | 2235 | atomic_inc(&rdev->nr_pending); |
2236 | md_error(conf->mddev, rdev); | 2236 | else |
2237 | rdev = NULL; | ||
2237 | rcu_read_unlock(); | 2238 | rcu_read_unlock(); |
2239 | if (rdev) { | ||
2240 | if (!rdev_set_badblocks( | ||
2241 | rdev, | ||
2242 | sh->sector, | ||
2243 | STRIPE_SECTORS, 0)) | ||
2244 | md_error(conf->mddev, rdev); | ||
2245 | rdev_dec_pending(rdev, conf->mddev); | ||
2246 | } | ||
2238 | } | 2247 | } |
2239 | spin_lock_irq(&conf->device_lock); | 2248 | spin_lock_irq(&conf->device_lock); |
2240 | /* fail all writes first */ | 2249 | /* fail all writes first */ |
@@ -2313,6 +2322,41 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, | |||
2313 | md_wakeup_thread(conf->mddev->thread); | 2322 | md_wakeup_thread(conf->mddev->thread); |
2314 | } | 2323 | } |
2315 | 2324 | ||
2325 | static void | ||
2326 | handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh, | ||
2327 | struct stripe_head_state *s) | ||
2328 | { | ||
2329 | int abort = 0; | ||
2330 | int i; | ||
2331 | |||
2332 | md_done_sync(conf->mddev, STRIPE_SECTORS, 0); | ||
2333 | clear_bit(STRIPE_SYNCING, &sh->state); | ||
2334 | s->syncing = 0; | ||
2335 | /* There is nothing more to do for sync/check/repair. | ||
2336 | * For recover we need to record a bad block on all | ||
2337 | * non-sync devices, or abort the recovery | ||
2338 | */ | ||
2339 | if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) | ||
2340 | return; | ||
2341 | /* During recovery devices cannot be removed, so locking and | ||
2342 | * refcounting of rdevs is not needed | ||
2343 | */ | ||
2344 | for (i = 0; i < conf->raid_disks; i++) { | ||
2345 | mdk_rdev_t *rdev = conf->disks[i].rdev; | ||
2346 | if (!rdev | ||
2347 | || test_bit(Faulty, &rdev->flags) | ||
2348 | || test_bit(In_sync, &rdev->flags)) | ||
2349 | continue; | ||
2350 | if (!rdev_set_badblocks(rdev, sh->sector, | ||
2351 | STRIPE_SECTORS, 0)) | ||
2352 | abort = 1; | ||
2353 | } | ||
2354 | if (abort) { | ||
2355 | conf->recovery_disabled = conf->mddev->recovery_disabled; | ||
2356 | set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); | ||
2357 | } | ||
2358 | } | ||
2359 | |||
2316 | /* fetch_block - checks the given member device to see if its data needs | 2360 | /* fetch_block - checks the given member device to see if its data needs |
2317 | * to be read or computed to satisfy a request. | 2361 | * to be read or computed to satisfy a request. |
2318 | * | 2362 | * |
@@ -3067,11 +3111,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
3067 | */ | 3111 | */ |
3068 | if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) | 3112 | if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) |
3069 | handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); | 3113 | handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); |
3070 | if (s.failed > conf->max_degraded && s.syncing) { | 3114 | if (s.failed > conf->max_degraded && s.syncing) |
3071 | md_done_sync(conf->mddev, STRIPE_SECTORS, 0); | 3115 | handle_failed_sync(conf, sh, &s); |
3072 | clear_bit(STRIPE_SYNCING, &sh->state); | ||
3073 | s.syncing = 0; | ||
3074 | } | ||
3075 | 3116 | ||
3076 | /* | 3117 | /* |
3077 | * might be able to return some write requests if the parity blocks | 3118 | * might be able to return some write requests if the parity blocks |
@@ -4976,6 +5017,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number) | |||
4976 | * isn't possible. | 5017 | * isn't possible. |
4977 | */ | 5018 | */ |
4978 | if (!test_bit(Faulty, &rdev->flags) && | 5019 | if (!test_bit(Faulty, &rdev->flags) && |
5020 | mddev->recovery_disabled != conf->recovery_disabled && | ||
4979 | !has_failed(conf) && | 5021 | !has_failed(conf) && |
4980 | number < conf->raid_disks) { | 5022 | number < conf->raid_disks) { |
4981 | err = -EBUSY; | 5023 | err = -EBUSY; |
@@ -5004,6 +5046,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
5004 | int first = 0; | 5046 | int first = 0; |
5005 | int last = conf->raid_disks - 1; | 5047 | int last = conf->raid_disks - 1; |
5006 | 5048 | ||
5049 | if (mddev->recovery_disabled == conf->recovery_disabled) | ||
5050 | return -EBUSY; | ||
5051 | |||
5007 | if (has_failed(conf)) | 5052 | if (has_failed(conf)) |
5008 | /* no point adding a device */ | 5053 | /* no point adding a device */ |
5009 | return -EINVAL; | 5054 | return -EINVAL; |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 68c500af1108..c5429d123636 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -399,7 +399,7 @@ struct raid5_private_data { | |||
399 | * (fresh device added). | 399 | * (fresh device added). |
400 | * Cleared when a sync completes. | 400 | * Cleared when a sync completes. |
401 | */ | 401 | */ |
402 | 402 | int recovery_disabled; | |
403 | /* per cpu variables */ | 403 | /* per cpu variables */ |
404 | struct raid5_percpu { | 404 | struct raid5_percpu { |
405 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 405 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |