diff options
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r-- | drivers/md/dm-raid.c | 82 |
1 files changed, 47 insertions, 35 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1b9795d75ef8..8abde6b8cedc 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -191,7 +191,6 @@ struct raid_dev { | |||
191 | #define RT_FLAG_RS_BITMAP_LOADED 2 | 191 | #define RT_FLAG_RS_BITMAP_LOADED 2 |
192 | #define RT_FLAG_UPDATE_SBS 3 | 192 | #define RT_FLAG_UPDATE_SBS 3 |
193 | #define RT_FLAG_RESHAPE_RS 4 | 193 | #define RT_FLAG_RESHAPE_RS 4 |
194 | #define RT_FLAG_KEEP_RS_FROZEN 5 | ||
195 | 194 | ||
196 | /* Array elements of 64 bit needed for rebuild/failed disk bits */ | 195 | /* Array elements of 64 bit needed for rebuild/failed disk bits */ |
197 | #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) | 196 | #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) |
@@ -861,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
861 | { | 860 | { |
862 | unsigned long min_region_size = rs->ti->len / (1 << 21); | 861 | unsigned long min_region_size = rs->ti->len / (1 << 21); |
863 | 862 | ||
863 | if (rs_is_raid0(rs)) | ||
864 | return 0; | ||
865 | |||
864 | if (!region_size) { | 866 | if (!region_size) { |
865 | /* | 867 | /* |
866 | * Choose a reasonable default. All figures in sectors. | 868 | * Choose a reasonable default. All figures in sectors. |
@@ -930,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
930 | rebuild_cnt++; | 932 | rebuild_cnt++; |
931 | 933 | ||
932 | switch (rs->raid_type->level) { | 934 | switch (rs->raid_type->level) { |
935 | case 0: | ||
936 | break; | ||
933 | case 1: | 937 | case 1: |
934 | if (rebuild_cnt >= rs->md.raid_disks) | 938 | if (rebuild_cnt >= rs->md.raid_disks) |
935 | goto too_many; | 939 | goto too_many; |
@@ -2335,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
2335 | case 0: | 2339 | case 0: |
2336 | break; | 2340 | break; |
2337 | default: | 2341 | default: |
2342 | /* | ||
2343 | * We have to keep any raid0 data/metadata device pairs or | ||
2344 | * the MD raid0 personality will fail to start the array. | ||
2345 | */ | ||
2346 | if (rs_is_raid0(rs)) | ||
2347 | continue; | ||
2348 | |||
2338 | dev = container_of(rdev, struct raid_dev, rdev); | 2349 | dev = container_of(rdev, struct raid_dev, rdev); |
2339 | if (dev->meta_dev) | 2350 | if (dev->meta_dev) |
2340 | dm_put_device(ti, dev->meta_dev); | 2351 | dm_put_device(ti, dev->meta_dev); |
@@ -2579,7 +2590,6 @@ static int rs_prepare_reshape(struct raid_set *rs) | |||
2579 | } else { | 2590 | } else { |
2580 | /* Process raid1 without delta_disks */ | 2591 | /* Process raid1 without delta_disks */ |
2581 | mddev->raid_disks = rs->raid_disks; | 2592 | mddev->raid_disks = rs->raid_disks; |
2582 | set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); | ||
2583 | reshape = false; | 2593 | reshape = false; |
2584 | } | 2594 | } |
2585 | } else { | 2595 | } else { |
@@ -2590,7 +2600,6 @@ static int rs_prepare_reshape(struct raid_set *rs) | |||
2590 | if (reshape) { | 2600 | if (reshape) { |
2591 | set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); | 2601 | set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); |
2592 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); | 2602 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
2593 | set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); | ||
2594 | } else if (mddev->raid_disks < rs->raid_disks) | 2603 | } else if (mddev->raid_disks < rs->raid_disks) |
2595 | /* Create new superblocks and bitmaps, if any new disks */ | 2604 | /* Create new superblocks and bitmaps, if any new disks */ |
2596 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); | 2605 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
@@ -2902,7 +2911,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |||
2902 | goto bad; | 2911 | goto bad; |
2903 | 2912 | ||
2904 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); | 2913 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
2905 | set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); | ||
2906 | /* Takeover ain't recovery, so disable recovery */ | 2914 | /* Takeover ain't recovery, so disable recovery */ |
2907 | rs_setup_recovery(rs, MaxSector); | 2915 | rs_setup_recovery(rs, MaxSector); |
2908 | rs_set_new(rs); | 2916 | rs_set_new(rs); |
@@ -3386,21 +3394,28 @@ static void raid_postsuspend(struct dm_target *ti) | |||
3386 | { | 3394 | { |
3387 | struct raid_set *rs = ti->private; | 3395 | struct raid_set *rs = ti->private; |
3388 | 3396 | ||
3389 | if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { | 3397 | if (!rs->md.suspended) |
3390 | if (!rs->md.suspended) | 3398 | mddev_suspend(&rs->md); |
3391 | mddev_suspend(&rs->md); | 3399 | |
3392 | rs->md.ro = 1; | 3400 | rs->md.ro = 1; |
3393 | } | ||
3394 | } | 3401 | } |
3395 | 3402 | ||
3396 | static void attempt_restore_of_faulty_devices(struct raid_set *rs) | 3403 | static void attempt_restore_of_faulty_devices(struct raid_set *rs) |
3397 | { | 3404 | { |
3398 | int i; | 3405 | int i; |
3399 | uint64_t failed_devices, cleared_failed_devices = 0; | 3406 | uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS]; |
3400 | unsigned long flags; | 3407 | unsigned long flags; |
3408 | bool cleared = false; | ||
3401 | struct dm_raid_superblock *sb; | 3409 | struct dm_raid_superblock *sb; |
3410 | struct mddev *mddev = &rs->md; | ||
3402 | struct md_rdev *r; | 3411 | struct md_rdev *r; |
3403 | 3412 | ||
3413 | /* RAID personalities have to provide hot add/remove methods or we need to bail out. */ | ||
3414 | if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk) | ||
3415 | return; | ||
3416 | |||
3417 | memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); | ||
3418 | |||
3404 | for (i = 0; i < rs->md.raid_disks; i++) { | 3419 | for (i = 0; i < rs->md.raid_disks; i++) { |
3405 | r = &rs->dev[i].rdev; | 3420 | r = &rs->dev[i].rdev; |
3406 | if (test_bit(Faulty, &r->flags) && r->sb_page && | 3421 | if (test_bit(Faulty, &r->flags) && r->sb_page && |
@@ -3420,7 +3435,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) | |||
3420 | * ourselves. | 3435 | * ourselves. |
3421 | */ | 3436 | */ |
3422 | if ((r->raid_disk >= 0) && | 3437 | if ((r->raid_disk >= 0) && |
3423 | (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) | 3438 | (mddev->pers->hot_remove_disk(mddev, r) != 0)) |
3424 | /* Failed to revive this device, try next */ | 3439 | /* Failed to revive this device, try next */ |
3425 | continue; | 3440 | continue; |
3426 | 3441 | ||
@@ -3430,22 +3445,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) | |||
3430 | clear_bit(Faulty, &r->flags); | 3445 | clear_bit(Faulty, &r->flags); |
3431 | clear_bit(WriteErrorSeen, &r->flags); | 3446 | clear_bit(WriteErrorSeen, &r->flags); |
3432 | clear_bit(In_sync, &r->flags); | 3447 | clear_bit(In_sync, &r->flags); |
3433 | if (r->mddev->pers->hot_add_disk(r->mddev, r)) { | 3448 | if (mddev->pers->hot_add_disk(mddev, r)) { |
3434 | r->raid_disk = -1; | 3449 | r->raid_disk = -1; |
3435 | r->saved_raid_disk = -1; | 3450 | r->saved_raid_disk = -1; |
3436 | r->flags = flags; | 3451 | r->flags = flags; |
3437 | } else { | 3452 | } else { |
3438 | r->recovery_offset = 0; | 3453 | r->recovery_offset = 0; |
3439 | cleared_failed_devices |= 1 << i; | 3454 | set_bit(i, (void *) cleared_failed_devices); |
3455 | cleared = true; | ||
3440 | } | 3456 | } |
3441 | } | 3457 | } |
3442 | } | 3458 | } |
3443 | if (cleared_failed_devices) { | 3459 | |
3460 | /* If any failed devices could be cleared, update all sbs failed_devices bits */ | ||
3461 | if (cleared) { | ||
3462 | uint64_t failed_devices[DISKS_ARRAY_ELEMS]; | ||
3463 | |||
3444 | rdev_for_each(r, &rs->md) { | 3464 | rdev_for_each(r, &rs->md) { |
3445 | sb = page_address(r->sb_page); | 3465 | sb = page_address(r->sb_page); |
3446 | failed_devices = le64_to_cpu(sb->failed_devices); | 3466 | sb_retrieve_failed_devices(sb, failed_devices); |
3447 | failed_devices &= ~cleared_failed_devices; | 3467 | |
3448 | sb->failed_devices = cpu_to_le64(failed_devices); | 3468 | for (i = 0; i < DISKS_ARRAY_ELEMS; i++) |
3469 | failed_devices[i] &= ~cleared_failed_devices[i]; | ||
3470 | |||
3471 | sb_update_failed_devices(sb, failed_devices); | ||
3449 | } | 3472 | } |
3450 | } | 3473 | } |
3451 | } | 3474 | } |
@@ -3610,26 +3633,15 @@ static void raid_resume(struct dm_target *ti) | |||
3610 | * devices are reachable again. | 3633 | * devices are reachable again. |
3611 | */ | 3634 | */ |
3612 | attempt_restore_of_faulty_devices(rs); | 3635 | attempt_restore_of_faulty_devices(rs); |
3613 | } else { | 3636 | } |
3614 | mddev->ro = 0; | ||
3615 | mddev->in_sync = 0; | ||
3616 | 3637 | ||
3617 | /* | 3638 | mddev->ro = 0; |
3618 | * When passing in flags to the ctr, we expect userspace | 3639 | mddev->in_sync = 0; |
3619 | * to reset them because they made it to the superblocks | ||
3620 | * and reload the mapping anyway. | ||
3621 | * | ||
3622 | * -> only unfreeze recovery in case of a table reload or | ||
3623 | * we'll have a bogus recovery/reshape position | ||
3624 | * retrieved from the superblock by the ctr because | ||
3625 | * the ongoing recovery/reshape will change it after read. | ||
3626 | */ | ||
3627 | if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags)) | ||
3628 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
3629 | 3640 | ||
3630 | if (mddev->suspended) | 3641 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
3631 | mddev_resume(mddev); | 3642 | |
3632 | } | 3643 | if (mddev->suspended) |
3644 | mddev_resume(mddev); | ||
3633 | } | 3645 | } |
3634 | 3646 | ||
3635 | static struct target_type raid_target = { | 3647 | static struct target_type raid_target = { |