aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c82
1 files changed, 47 insertions, 35 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1b9795d75ef8..8abde6b8cedc 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -191,7 +191,6 @@ struct raid_dev {
191#define RT_FLAG_RS_BITMAP_LOADED 2 191#define RT_FLAG_RS_BITMAP_LOADED 2
192#define RT_FLAG_UPDATE_SBS 3 192#define RT_FLAG_UPDATE_SBS 3
193#define RT_FLAG_RESHAPE_RS 4 193#define RT_FLAG_RESHAPE_RS 4
194#define RT_FLAG_KEEP_RS_FROZEN 5
195 194
196/* Array elements of 64 bit needed for rebuild/failed disk bits */ 195/* Array elements of 64 bit needed for rebuild/failed disk bits */
197#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) 196#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -861,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
861{ 860{
862 unsigned long min_region_size = rs->ti->len / (1 << 21); 861 unsigned long min_region_size = rs->ti->len / (1 << 21);
863 862
863 if (rs_is_raid0(rs))
864 return 0;
865
864 if (!region_size) { 866 if (!region_size) {
865 /* 867 /*
866 * Choose a reasonable default. All figures in sectors. 868 * Choose a reasonable default. All figures in sectors.
@@ -930,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs)
930 rebuild_cnt++; 932 rebuild_cnt++;
931 933
932 switch (rs->raid_type->level) { 934 switch (rs->raid_type->level) {
935 case 0:
936 break;
933 case 1: 937 case 1:
934 if (rebuild_cnt >= rs->md.raid_disks) 938 if (rebuild_cnt >= rs->md.raid_disks)
935 goto too_many; 939 goto too_many;
@@ -2335,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
2335 case 0: 2339 case 0:
2336 break; 2340 break;
2337 default: 2341 default:
2342 /*
2343 * We have to keep any raid0 data/metadata device pairs or
2344 * the MD raid0 personality will fail to start the array.
2345 */
2346 if (rs_is_raid0(rs))
2347 continue;
2348
2338 dev = container_of(rdev, struct raid_dev, rdev); 2349 dev = container_of(rdev, struct raid_dev, rdev);
2339 if (dev->meta_dev) 2350 if (dev->meta_dev)
2340 dm_put_device(ti, dev->meta_dev); 2351 dm_put_device(ti, dev->meta_dev);
@@ -2579,7 +2590,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
2579 } else { 2590 } else {
2580 /* Process raid1 without delta_disks */ 2591 /* Process raid1 without delta_disks */
2581 mddev->raid_disks = rs->raid_disks; 2592 mddev->raid_disks = rs->raid_disks;
2582 set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
2583 reshape = false; 2593 reshape = false;
2584 } 2594 }
2585 } else { 2595 } else {
@@ -2590,7 +2600,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
2590 if (reshape) { 2600 if (reshape) {
2591 set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); 2601 set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
2592 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 2602 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
2593 set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
2594 } else if (mddev->raid_disks < rs->raid_disks) 2603 } else if (mddev->raid_disks < rs->raid_disks)
2595 /* Create new superblocks and bitmaps, if any new disks */ 2604 /* Create new superblocks and bitmaps, if any new disks */
2596 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 2605 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
@@ -2902,7 +2911,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2902 goto bad; 2911 goto bad;
2903 2912
2904 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 2913 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
2905 set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
2906 /* Takeover ain't recovery, so disable recovery */ 2914 /* Takeover ain't recovery, so disable recovery */
2907 rs_setup_recovery(rs, MaxSector); 2915 rs_setup_recovery(rs, MaxSector);
2908 rs_set_new(rs); 2916 rs_set_new(rs);
@@ -3386,21 +3394,28 @@ static void raid_postsuspend(struct dm_target *ti)
3386{ 3394{
3387 struct raid_set *rs = ti->private; 3395 struct raid_set *rs = ti->private;
3388 3396
3389 if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { 3397 if (!rs->md.suspended)
3390 if (!rs->md.suspended) 3398 mddev_suspend(&rs->md);
3391 mddev_suspend(&rs->md); 3399
3392 rs->md.ro = 1; 3400 rs->md.ro = 1;
3393 }
3394} 3401}
3395 3402
3396static void attempt_restore_of_faulty_devices(struct raid_set *rs) 3403static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3397{ 3404{
3398 int i; 3405 int i;
3399 uint64_t failed_devices, cleared_failed_devices = 0; 3406 uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
3400 unsigned long flags; 3407 unsigned long flags;
3408 bool cleared = false;
3401 struct dm_raid_superblock *sb; 3409 struct dm_raid_superblock *sb;
3410 struct mddev *mddev = &rs->md;
3402 struct md_rdev *r; 3411 struct md_rdev *r;
3403 3412
3413 /* RAID personalities have to provide hot add/remove methods or we need to bail out. */
3414 if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
3415 return;
3416
3417 memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
3418
3404 for (i = 0; i < rs->md.raid_disks; i++) { 3419 for (i = 0; i < rs->md.raid_disks; i++) {
3405 r = &rs->dev[i].rdev; 3420 r = &rs->dev[i].rdev;
3406 if (test_bit(Faulty, &r->flags) && r->sb_page && 3421 if (test_bit(Faulty, &r->flags) && r->sb_page &&
@@ -3420,7 +3435,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3420 * ourselves. 3435 * ourselves.
3421 */ 3436 */
3422 if ((r->raid_disk >= 0) && 3437 if ((r->raid_disk >= 0) &&
3423 (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) 3438 (mddev->pers->hot_remove_disk(mddev, r) != 0))
3424 /* Failed to revive this device, try next */ 3439 /* Failed to revive this device, try next */
3425 continue; 3440 continue;
3426 3441
@@ -3430,22 +3445,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3430 clear_bit(Faulty, &r->flags); 3445 clear_bit(Faulty, &r->flags);
3431 clear_bit(WriteErrorSeen, &r->flags); 3446 clear_bit(WriteErrorSeen, &r->flags);
3432 clear_bit(In_sync, &r->flags); 3447 clear_bit(In_sync, &r->flags);
3433 if (r->mddev->pers->hot_add_disk(r->mddev, r)) { 3448 if (mddev->pers->hot_add_disk(mddev, r)) {
3434 r->raid_disk = -1; 3449 r->raid_disk = -1;
3435 r->saved_raid_disk = -1; 3450 r->saved_raid_disk = -1;
3436 r->flags = flags; 3451 r->flags = flags;
3437 } else { 3452 } else {
3438 r->recovery_offset = 0; 3453 r->recovery_offset = 0;
3439 cleared_failed_devices |= 1 << i; 3454 set_bit(i, (void *) cleared_failed_devices);
3455 cleared = true;
3440 } 3456 }
3441 } 3457 }
3442 } 3458 }
3443 if (cleared_failed_devices) { 3459
3460 /* If any failed devices could be cleared, update all sbs failed_devices bits */
3461 if (cleared) {
3462 uint64_t failed_devices[DISKS_ARRAY_ELEMS];
3463
3444 rdev_for_each(r, &rs->md) { 3464 rdev_for_each(r, &rs->md) {
3445 sb = page_address(r->sb_page); 3465 sb = page_address(r->sb_page);
3446 failed_devices = le64_to_cpu(sb->failed_devices); 3466 sb_retrieve_failed_devices(sb, failed_devices);
3447 failed_devices &= ~cleared_failed_devices; 3467
3448 sb->failed_devices = cpu_to_le64(failed_devices); 3468 for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
3469 failed_devices[i] &= ~cleared_failed_devices[i];
3470
3471 sb_update_failed_devices(sb, failed_devices);
3449 } 3472 }
3450 } 3473 }
3451} 3474}
@@ -3610,26 +3633,15 @@ static void raid_resume(struct dm_target *ti)
3610 * devices are reachable again. 3633 * devices are reachable again.
3611 */ 3634 */
3612 attempt_restore_of_faulty_devices(rs); 3635 attempt_restore_of_faulty_devices(rs);
3613 } else { 3636 }
3614 mddev->ro = 0;
3615 mddev->in_sync = 0;
3616 3637
3617 /* 3638 mddev->ro = 0;
3618 * When passing in flags to the ctr, we expect userspace 3639 mddev->in_sync = 0;
3619 * to reset them because they made it to the superblocks
3620 * and reload the mapping anyway.
3621 *
3622 * -> only unfreeze recovery in case of a table reload or
3623 * we'll have a bogus recovery/reshape position
3624 * retrieved from the superblock by the ctr because
3625 * the ongoing recovery/reshape will change it after read.
3626 */
3627 if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
3628 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3629 3640
3630 if (mddev->suspended) 3641 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3631 mddev_resume(mddev); 3642
3632 } 3643 if (mddev->suspended)
3644 mddev_resume(mddev);
3633} 3645}
3634 3646
3635static struct target_type raid_target = { 3647static struct target_type raid_target = {