diff options
author | Heinz Mauelshagen <heinzm@redhat.com> | 2016-06-14 15:23:13 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2016-06-14 18:52:14 -0400 |
commit | 6e20902e8f9e1551afa75bd499be853a95745b9f (patch) | |
tree | 7aab7c01aae2cf81f3da0fb987673a0b8d4d8bf0 /drivers/md/dm-raid.c | |
parent | 4257e085e26edaba0bf516ea231bd5122e3f3e6f (diff) |
dm raid: fix failed takeover/reshapes by keeping raid set frozen
Superblock updates where bogus causing some takovers/reshapes to fail.
Introduce new runtime flag (RT_FLAG_KEEP_RS_FROZEN) to keep a raid set
frozen when a layout change was requested. Userpace will immediately
reload the table w/o the flags requesting such change once they made it
to the superblocks and any change of recovery/reshape offsets has to be
avoided until after read.
Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r-- | drivers/md/dm-raid.c | 85 |
1 files changed, 56 insertions, 29 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 7df450877423..8d4865184b96 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -190,6 +190,7 @@ struct raid_dev { | |||
190 | #define RT_FLAG_RS_BITMAP_LOADED 2 | 190 | #define RT_FLAG_RS_BITMAP_LOADED 2 |
191 | #define RT_FLAG_UPDATE_SBS 3 | 191 | #define RT_FLAG_UPDATE_SBS 3 |
192 | #define RT_FLAG_RESHAPE_RS 4 | 192 | #define RT_FLAG_RESHAPE_RS 4 |
193 | #define RT_FLAG_KEEP_RS_FROZEN 5 | ||
193 | 194 | ||
194 | /* Array elements of 64 bit needed for rebuild/write_mostly bits */ | 195 | /* Array elements of 64 bit needed for rebuild/write_mostly bits */ |
195 | #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) | 196 | #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) |
@@ -2727,6 +2728,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2727 | return r; | 2728 | return r; |
2728 | 2729 | ||
2729 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); | 2730 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); |
2731 | set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); | ||
2730 | rs_set_new(rs); | 2732 | rs_set_new(rs); |
2731 | } else if (rs_reshape_requested(rs)) { | 2733 | } else if (rs_reshape_requested(rs)) { |
2732 | if (rs_is_reshaping(rs)) { | 2734 | if (rs_is_reshaping(rs)) { |
@@ -2767,13 +2769,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
2767 | * Would cause allocations in raid1->check_reshape | 2769 | * Would cause allocations in raid1->check_reshape |
2768 | * though, thus more issues with potential failures | 2770 | * though, thus more issues with potential failures |
2769 | */ | 2771 | */ |
2770 | else if (rs_is_raid1(rs)) | 2772 | else if (rs_is_raid1(rs)) { |
2773 | set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); | ||
2771 | rs->md.raid_disks = rs->raid_disks; | 2774 | rs->md.raid_disks = rs->raid_disks; |
2775 | } | ||
2776 | |||
2777 | if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) { | ||
2778 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); | ||
2779 | set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); | ||
2780 | } | ||
2772 | 2781 | ||
2773 | if (rs->md.raid_disks < rs->raid_disks) | 2782 | if (rs->md.raid_disks < rs->raid_disks) |
2774 | set_bit(MD_ARRAY_FIRST_USE, &rs->md.flags); | 2783 | set_bit(MD_ARRAY_FIRST_USE, &rs->md.flags); |
2775 | 2784 | ||
2776 | set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); | ||
2777 | rs_set_cur(rs); | 2785 | rs_set_cur(rs); |
2778 | } else | 2786 | } else |
2779 | rs_set_cur(rs); | 2787 | rs_set_cur(rs); |
@@ -3231,9 +3239,11 @@ static void raid_postsuspend(struct dm_target *ti) | |||
3231 | { | 3239 | { |
3232 | struct raid_set *rs = ti->private; | 3240 | struct raid_set *rs = ti->private; |
3233 | 3241 | ||
3234 | mddev_suspend(&rs->md); | 3242 | if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { |
3235 | rs->md.ro = 1; | 3243 | if (!rs->md.suspended) |
3236 | clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags); | 3244 | mddev_suspend(&rs->md); |
3245 | rs->md.ro = 1; | ||
3246 | } | ||
3237 | } | 3247 | } |
3238 | 3248 | ||
3239 | static void attempt_restore_of_faulty_devices(struct raid_set *rs) | 3249 | static void attempt_restore_of_faulty_devices(struct raid_set *rs) |
@@ -3308,6 +3318,18 @@ static int __load_dirty_region_bitmap(struct raid_set *rs) | |||
3308 | return r; | 3318 | return r; |
3309 | } | 3319 | } |
3310 | 3320 | ||
3321 | /* Enforce updating all superblocks */ | ||
3322 | static void rs_update_sbs(struct raid_set *rs) | ||
3323 | { | ||
3324 | struct mddev *mddev = &rs->md; | ||
3325 | int ro = mddev->ro; | ||
3326 | |||
3327 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
3328 | mddev->ro = 0; | ||
3329 | md_update_sb(mddev, 1); | ||
3330 | mddev->ro = ro; | ||
3331 | } | ||
3332 | |||
3311 | /* | 3333 | /* |
3312 | * Reshape changes raid algorithm of @rs to new one within personality | 3334 | * Reshape changes raid algorithm of @rs to new one within personality |
3313 | * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes | 3335 | * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes |
@@ -3356,9 +3378,12 @@ static int rs_start_reshape(struct raid_set *rs) | |||
3356 | if (!mddev->suspended) | 3378 | if (!mddev->suspended) |
3357 | mddev_suspend(mddev); | 3379 | mddev_suspend(mddev); |
3358 | 3380 | ||
3359 | mddev->ro = 0; | 3381 | /* |
3360 | md_update_sb(mddev, 1); | 3382 | * Now reshape got set up, update superblocks to |
3361 | mddev->ro = 1; | 3383 | * reflect the fact so that a table reload will |
3384 | * access proper superblock content in the ctr. | ||
3385 | */ | ||
3386 | rs_update_sbs(rs); | ||
3362 | 3387 | ||
3363 | return 0; | 3388 | return 0; |
3364 | } | 3389 | } |
@@ -3375,22 +3400,12 @@ static int raid_preresume(struct dm_target *ti) | |||
3375 | 3400 | ||
3376 | /* | 3401 | /* |
3377 | * The superblocks need to be updated on disk if the | 3402 | * The superblocks need to be updated on disk if the |
3378 | * array is new or __load_dirty_region_bitmap will overwrite them | 3403 | * array is new or new devices got added (thus zeroed |
3379 | * in core with old data. | 3404 | * out by userspace) or __load_dirty_region_bitmap |
3380 | * | 3405 | * will overwrite them in core with old data or fail. |
3381 | * In case the array got modified (takeover/reshape/resize) | ||
3382 | * or the data offsets on the component devices changed, they | ||
3383 | * have to be updated as well. | ||
3384 | * | ||
3385 | * Have to switch to readwrite and back in order to | ||
3386 | * allow for the superblock updates. | ||
3387 | */ | 3406 | */ |
3388 | if (test_and_clear_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) { | 3407 | if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) |
3389 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 3408 | rs_update_sbs(rs); |
3390 | mddev->ro = 0; | ||
3391 | md_update_sb(mddev, 1); | ||
3392 | mddev->ro = 1; | ||
3393 | } | ||
3394 | 3409 | ||
3395 | /* | 3410 | /* |
3396 | * Disable/enable discard support on raid set after any | 3411 | * Disable/enable discard support on raid set after any |
@@ -3449,14 +3464,26 @@ static void raid_resume(struct dm_target *ti) | |||
3449 | * devices are reachable again. | 3464 | * devices are reachable again. |
3450 | */ | 3465 | */ |
3451 | attempt_restore_of_faulty_devices(rs); | 3466 | attempt_restore_of_faulty_devices(rs); |
3452 | } | 3467 | } else { |
3468 | mddev->ro = 0; | ||
3469 | mddev->in_sync = 0; | ||
3453 | 3470 | ||
3454 | mddev->ro = 0; | 3471 | /* |
3455 | mddev->in_sync = 0; | 3472 | * When passing in flags to the ctr, we expect userspace |
3456 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 3473 | * to reset them because they made it to the superblocks |
3474 | * and reload the mapping anyway. | ||
3475 | * | ||
3476 | * -> only unfreeze recovery in case of a table reload or | ||
3477 | * we'll have a bogus recovery/reshape position | ||
3478 | * retrieved from the superblock by the ctr because | ||
3479 | * the ongoing recovery/reshape will change it after read. | ||
3480 | */ | ||
3481 | if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags)) | ||
3482 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | ||
3457 | 3483 | ||
3458 | if (mddev->suspended) | 3484 | if (mddev->suspended) |
3459 | mddev_resume(mddev); | 3485 | mddev_resume(mddev); |
3486 | } | ||
3460 | } | 3487 | } |
3461 | 3488 | ||
3462 | static struct target_type raid_target = { | 3489 | static struct target_type raid_target = { |