aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:39:22 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:39:22 -0400
commit7f0da59bdc2f65795a57009d78f7753d3aea1de3 (patch)
tree4ab3ae7188122657c12dfab61626d96360fb6240
parent31c176ecdf3563140e6395249eda51a18130d9f6 (diff)
md/raid5: use bad-block log to improve handling of uncorrectable read errors.
If we get an uncorrectable read error - record a bad block rather than failing the device. And if these errors (which may be due to known bad blocks) cause recovery to be impossible, record a bad block on the recovering devices, or abort the recovery. As we might abort a recovery without failing a device we need to teach RAID5 about recovery_disabled handling. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c59
-rw-r--r--drivers/md/raid5.h2
2 files changed, 53 insertions, 8 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a2d68389ee75..5fc621673e6c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2232,9 +2232,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
2232 rcu_read_lock(); 2232 rcu_read_lock();
2233 rdev = rcu_dereference(conf->disks[i].rdev); 2233 rdev = rcu_dereference(conf->disks[i].rdev);
2234 if (rdev && test_bit(In_sync, &rdev->flags)) 2234 if (rdev && test_bit(In_sync, &rdev->flags))
2235 /* multiple read failures in one stripe */ 2235 atomic_inc(&rdev->nr_pending);
2236 md_error(conf->mddev, rdev); 2236 else
2237 rdev = NULL;
2237 rcu_read_unlock(); 2238 rcu_read_unlock();
2239 if (rdev) {
2240 if (!rdev_set_badblocks(
2241 rdev,
2242 sh->sector,
2243 STRIPE_SECTORS, 0))
2244 md_error(conf->mddev, rdev);
2245 rdev_dec_pending(rdev, conf->mddev);
2246 }
2238 } 2247 }
2239 spin_lock_irq(&conf->device_lock); 2248 spin_lock_irq(&conf->device_lock);
2240 /* fail all writes first */ 2249 /* fail all writes first */
@@ -2313,6 +2322,41 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
2313 md_wakeup_thread(conf->mddev->thread); 2322 md_wakeup_thread(conf->mddev->thread);
2314} 2323}
2315 2324
2325static void
2326handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
2327 struct stripe_head_state *s)
2328{
2329 int abort = 0;
2330 int i;
2331
2332 md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
2333 clear_bit(STRIPE_SYNCING, &sh->state);
2334 s->syncing = 0;
2335 /* There is nothing more to do for sync/check/repair.
2336 * For recover we need to record a bad block on all
2337 * non-sync devices, or abort the recovery
2338 */
2339 if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
2340 return;
2341 /* During recovery devices cannot be removed, so locking and
2342 * refcounting of rdevs is not needed
2343 */
2344 for (i = 0; i < conf->raid_disks; i++) {
2345 mdk_rdev_t *rdev = conf->disks[i].rdev;
2346 if (!rdev
2347 || test_bit(Faulty, &rdev->flags)
2348 || test_bit(In_sync, &rdev->flags))
2349 continue;
2350 if (!rdev_set_badblocks(rdev, sh->sector,
2351 STRIPE_SECTORS, 0))
2352 abort = 1;
2353 }
2354 if (abort) {
2355 conf->recovery_disabled = conf->mddev->recovery_disabled;
2356 set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
2357 }
2358}
2359
2316/* fetch_block - checks the given member device to see if its data needs 2360/* fetch_block - checks the given member device to see if its data needs
2317 * to be read or computed to satisfy a request. 2361 * to be read or computed to satisfy a request.
2318 * 2362 *
@@ -3067,11 +3111,8 @@ static void handle_stripe(struct stripe_head *sh)
3067 */ 3111 */
3068 if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) 3112 if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
3069 handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); 3113 handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
3070 if (s.failed > conf->max_degraded && s.syncing) { 3114 if (s.failed > conf->max_degraded && s.syncing)
3071 md_done_sync(conf->mddev, STRIPE_SECTORS, 0); 3115 handle_failed_sync(conf, sh, &s);
3072 clear_bit(STRIPE_SYNCING, &sh->state);
3073 s.syncing = 0;
3074 }
3075 3116
3076 /* 3117 /*
3077 * might be able to return some write requests if the parity blocks 3118 * might be able to return some write requests if the parity blocks
@@ -4976,6 +5017,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
4976 * isn't possible. 5017 * isn't possible.
4977 */ 5018 */
4978 if (!test_bit(Faulty, &rdev->flags) && 5019 if (!test_bit(Faulty, &rdev->flags) &&
5020 mddev->recovery_disabled != conf->recovery_disabled &&
4979 !has_failed(conf) && 5021 !has_failed(conf) &&
4980 number < conf->raid_disks) { 5022 number < conf->raid_disks) {
4981 err = -EBUSY; 5023 err = -EBUSY;
@@ -5004,6 +5046,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
5004 int first = 0; 5046 int first = 0;
5005 int last = conf->raid_disks - 1; 5047 int last = conf->raid_disks - 1;
5006 5048
5049 if (mddev->recovery_disabled == conf->recovery_disabled)
5050 return -EBUSY;
5051
5007 if (has_failed(conf)) 5052 if (has_failed(conf))
5008 /* no point adding a device */ 5053 /* no point adding a device */
5009 return -EINVAL; 5054 return -EINVAL;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 68c500af1108..c5429d123636 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -399,7 +399,7 @@ struct raid5_private_data {
399 * (fresh device added). 399 * (fresh device added).
400 * Cleared when a sync completes. 400 * Cleared when a sync completes.
401 */ 401 */
402 402 int recovery_disabled;
403 /* per cpu variables */ 403 /* per cpu variables */
404 struct raid5_percpu { 404 struct raid5_percpu {
405 struct page *spare_page; /* Used when checking P/Q in raid6 */ 405 struct page *spare_page; /* Used when checking P/Q in raid6 */