aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
authorHeinz Mauelshagen <heinzm@redhat.com>2017-01-13 21:53:07 -0500
committerMike Snitzer <snitzer@redhat.com>2017-01-25 06:49:06 -0500
commitc63ede3b4211e1e2489eda6a2efb0eb6fa26483a (patch)
tree5f3cc66a60cbb756d9b120f7a6322bc36ad9c6f6 /drivers/md/dm-raid.c
parent7a308bb3016f57e5be11a677d15b821536419d36 (diff)
dm raid: fix transient device failure processing
This fix addresses the following 3 failure scenarios: 1) If a (transiently) inaccessible metadata device is being passed into the constructor (e.g. a device tuple '254:4 254:5'), it is processed as if '- -' was given. This erroneously results in a status table line containing '- -', which mistakenly differs from what has been passed in. As a result, userspace libdevmapper puts the device tuple seperate from the RAID device thus not processing the dependencies properly. 2) False health status char 'A' instead of 'D' is emitted on the status status info line for the meta/data device tuple in this metadata device failure case. 3) If the metadata device is accessible when passed into the constructor but the data device (partially) isn't, that leg may be set faulty by the raid personality on access to the (partially) unavailable leg. Restore tried in a second raid device resume on such failed leg (status char 'D') fails after the (partial) leg returned. Fixes for aforementioned failure scenarios: - don't release passed in devices in the constructor thus allowing the status table line to e.g. contain '254:4 254:5' rather than '- -' - emit device status char 'D' rather than 'A' for the device tuple with the failed metadata device on the status info line - when attempting to restore faulty devices in a second resume, allow the device hot remove function to succeed by setting the device to not in-sync In case userspace intentionally passes '- -' into the constructor to avoid that device tuple (e.g. to split off a raid1 leg temporarily for later re-addition), the status table line will correctly show '- -' and the status info line will provide a '-' device health character for the non-defined device tuple. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c87
1 files changed, 38 insertions, 49 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b8f978e551d7..b40a088a2d92 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2253,7 +2253,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
2253 struct mddev *mddev = &rs->md; 2253 struct mddev *mddev = &rs->md;
2254 struct dm_raid_superblock *sb; 2254 struct dm_raid_superblock *sb;
2255 2255
2256 if (rs_is_raid0(rs) || !rdev->sb_page) 2256 if (rs_is_raid0(rs) || !rdev->sb_page || rdev->raid_disk < 0)
2257 return 0; 2257 return 0;
2258 2258
2259 sb = page_address(rdev->sb_page); 2259 sb = page_address(rdev->sb_page);
@@ -2316,21 +2316,19 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
2316static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) 2316static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
2317{ 2317{
2318 int r; 2318 int r;
2319 struct raid_dev *dev; 2319 struct md_rdev *rdev, *freshest;
2320 struct md_rdev *rdev, *tmp, *freshest;
2321 struct mddev *mddev = &rs->md; 2320 struct mddev *mddev = &rs->md;
2322 2321
2323 freshest = NULL; 2322 freshest = NULL;
2324 rdev_for_each_safe(rdev, tmp, mddev) { 2323 rdev_for_each(rdev, mddev) {
2325 /* 2324 /*
2326 * Skipping super_load due to CTR_FLAG_SYNC will cause 2325 * Skipping super_load due to CTR_FLAG_SYNC will cause
2327 * the array to undergo initialization again as 2326 * the array to undergo initialization again as
2328 * though it were new. This is the intended effect 2327 * though it were new. This is the intended effect
2329 * of the "sync" directive. 2328 * of the "sync" directive.
2330 * 2329 *
2331 * When reshaping capability is added, we must ensure 2330 * With reshaping capability added, we must ensure that
2332 * that the "sync" directive is disallowed during the 2331 * that the "sync" directive is disallowed during the reshape.
2333 * reshape.
2334 */ 2332 */
2335 if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) 2333 if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
2336 continue; 2334 continue;
@@ -2347,6 +2345,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
2347 case 0: 2345 case 0:
2348 break; 2346 break;
2349 default: 2347 default:
2348 /* This is a failure to read the superblock from the metadata device. */
2350 /* 2349 /*
2351 * We have to keep any raid0 data/metadata device pairs or 2350 * We have to keep any raid0 data/metadata device pairs or
2352 * the MD raid0 personality will fail to start the array. 2351 * the MD raid0 personality will fail to start the array.
@@ -2354,33 +2353,17 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
2354 if (rs_is_raid0(rs)) 2353 if (rs_is_raid0(rs))
2355 continue; 2354 continue;
2356 2355
2357 dev = container_of(rdev, struct raid_dev, rdev);
2358 if (dev->meta_dev)
2359 dm_put_device(ti, dev->meta_dev);
2360
2361 dev->meta_dev = NULL;
2362 rdev->meta_bdev = NULL;
2363
2364 if (rdev->sb_page)
2365 put_page(rdev->sb_page);
2366
2367 rdev->sb_page = NULL;
2368
2369 rdev->sb_loaded = 0;
2370
2371 /* 2356 /*
2372 * We might be able to salvage the data device 2357 * We keep the dm_devs to be able to emit the device tuple
2373 * even though the meta device has failed. For 2358 * properly on the table line in raid_status() (rather than
2374 * now, we behave as though '- -' had been 2359 * mistakenly acting as if '- -' got passed into the constructor).
2375 * set for this device in the table. 2360 *
2361 * The rdev has to stay on the same_set list to allow for
2362 * the attempt to restore faulty devices on second resume.
2376 */ 2363 */
2377 if (dev->data_dev) 2364 set_bit(Faulty, &rdev->flags);
2378 dm_put_device(ti, dev->data_dev); 2365 rdev->raid_disk = rdev->saved_raid_disk = -1;
2379 2366 break;
2380 dev->data_dev = NULL;
2381 rdev->bdev = NULL;
2382
2383 list_del(&rdev->same_set);
2384 } 2367 }
2385 } 2368 }
2386 2369
@@ -3078,10 +3061,13 @@ static const char *decipher_sync_action(struct mddev *mddev)
3078 * 'D' = Dead/Failed device 3061 * 'D' = Dead/Failed device
3079 * 'a' = Alive but not in-sync 3062 * 'a' = Alive but not in-sync
3080 * 'A' = Alive and in-sync 3063 * 'A' = Alive and in-sync
3064 * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
3081 */ 3065 */
3082static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync) 3066static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
3083{ 3067{
3084 if (test_bit(Faulty, &rdev->flags)) 3068 if (!rdev->bdev)
3069 return "-";
3070 else if (test_bit(Faulty, &rdev->flags))
3085 return "D"; 3071 return "D";
3086 else if (!array_in_sync || !test_bit(In_sync, &rdev->flags)) 3072 else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
3087 return "a"; 3073 return "a";
@@ -3183,7 +3169,6 @@ static void raid_status(struct dm_target *ti, status_type_t type,
3183 sector_t progress, resync_max_sectors, resync_mismatches; 3169 sector_t progress, resync_max_sectors, resync_mismatches;
3184 const char *sync_action; 3170 const char *sync_action;
3185 struct raid_type *rt; 3171 struct raid_type *rt;
3186 struct md_rdev *rdev;
3187 3172
3188 switch (type) { 3173 switch (type) {
3189 case STATUSTYPE_INFO: 3174 case STATUSTYPE_INFO:
@@ -3204,9 +3189,9 @@ static void raid_status(struct dm_target *ti, status_type_t type,
3204 atomic64_read(&mddev->resync_mismatches) : 0; 3189 atomic64_read(&mddev->resync_mismatches) : 0;
3205 sync_action = decipher_sync_action(&rs->md); 3190 sync_action = decipher_sync_action(&rs->md);
3206 3191
3207 /* HM FIXME: do we want another state char for raid0? It shows 'D' or 'A' now */ 3192 /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
3208 rdev_for_each(rdev, mddev) 3193 for (i = 0; i < rs->raid_disks; i++)
3209 DMEMIT(__raid_dev_status(rdev, array_in_sync)); 3194 DMEMIT(__raid_dev_status(&rs->dev[i].rdev, array_in_sync));
3210 3195
3211 /* 3196 /*
3212 * In-sync/Reshape ratio: 3197 * In-sync/Reshape ratio:
@@ -3427,7 +3412,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3427 3412
3428 memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); 3413 memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
3429 3414
3430 for (i = 0; i < rs->md.raid_disks; i++) { 3415 for (i = 0; i < mddev->raid_disks; i++) {
3431 r = &rs->dev[i].rdev; 3416 r = &rs->dev[i].rdev;
3432 if (test_bit(Faulty, &r->flags) && r->sb_page && 3417 if (test_bit(Faulty, &r->flags) && r->sb_page &&
3433 sync_page_io(r, 0, r->sb_size, r->sb_page, 3418 sync_page_io(r, 0, r->sb_size, r->sb_page,
@@ -3445,22 +3430,26 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3445 * '>= 0' - meaning we must call this function 3430 * '>= 0' - meaning we must call this function
3446 * ourselves. 3431 * ourselves.
3447 */ 3432 */
3448 if ((r->raid_disk >= 0) &&
3449 (mddev->pers->hot_remove_disk(mddev, r) != 0))
3450 /* Failed to revive this device, try next */
3451 continue;
3452
3453 r->raid_disk = i;
3454 r->saved_raid_disk = i;
3455 flags = r->flags; 3433 flags = r->flags;
3434 clear_bit(In_sync, &r->flags); /* Mandatory for hot remove. */
3435 if (r->raid_disk >= 0) {
3436 if (mddev->pers->hot_remove_disk(mddev, r)) {
3437 /* Failed to revive this device, try next */
3438 r->flags = flags;
3439 continue;
3440 }
3441 } else
3442 r->raid_disk = r->saved_raid_disk = i;
3443
3456 clear_bit(Faulty, &r->flags); 3444 clear_bit(Faulty, &r->flags);
3457 clear_bit(WriteErrorSeen, &r->flags); 3445 clear_bit(WriteErrorSeen, &r->flags);
3458 clear_bit(In_sync, &r->flags); 3446
3459 if (mddev->pers->hot_add_disk(mddev, r)) { 3447 if (mddev->pers->hot_add_disk(mddev, r)) {
3460 r->raid_disk = -1; 3448 /* Failed to revive this device, try next */
3461 r->saved_raid_disk = -1; 3449 r->raid_disk = r->saved_raid_disk = -1;
3462 r->flags = flags; 3450 r->flags = flags;
3463 } else { 3451 } else {
3452 clear_bit(In_sync, &r->flags);
3464 r->recovery_offset = 0; 3453 r->recovery_offset = 0;
3465 set_bit(i, (void *) cleared_failed_devices); 3454 set_bit(i, (void *) cleared_failed_devices);
3466 cleared = true; 3455 cleared = true;
@@ -3651,7 +3640,7 @@ static void raid_resume(struct dm_target *ti)
3651 3640
3652static struct target_type raid_target = { 3641static struct target_type raid_target = {
3653 .name = "raid", 3642 .name = "raid",
3654 .version = {1, 9, 1}, 3643 .version = {1, 9, 2},
3655 .module = THIS_MODULE, 3644 .module = THIS_MODULE,
3656 .ctr = raid_ctr, 3645 .ctr = raid_ctr,
3657 .dtr = raid_dtr, 3646 .dtr = raid_dtr,