aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
authorHeinz Mauelshagen <heinzm@redhat.com>2016-06-14 15:23:13 -0400
committerMike Snitzer <snitzer@redhat.com>2016-06-14 18:52:14 -0400
commit6e20902e8f9e1551afa75bd499be853a95745b9f (patch)
tree7aab7c01aae2cf81f3da0fb987673a0b8d4d8bf0 /drivers/md/dm-raid.c
parent4257e085e26edaba0bf516ea231bd5122e3f3e6f (diff)
dm raid: fix failed takeover/reshapes by keeping raid set frozen
Superblock updates where bogus causing some takovers/reshapes to fail. Introduce new runtime flag (RT_FLAG_KEEP_RS_FROZEN) to keep a raid set frozen when a layout change was requested. Userpace will immediately reload the table w/o the flags requesting such change once they made it to the superblocks and any change of recovery/reshape offsets has to be avoided until after read. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c85
1 files changed, 56 insertions, 29 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7df450877423..8d4865184b96 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -190,6 +190,7 @@ struct raid_dev {
190#define RT_FLAG_RS_BITMAP_LOADED 2 190#define RT_FLAG_RS_BITMAP_LOADED 2
191#define RT_FLAG_UPDATE_SBS 3 191#define RT_FLAG_UPDATE_SBS 3
192#define RT_FLAG_RESHAPE_RS 4 192#define RT_FLAG_RESHAPE_RS 4
193#define RT_FLAG_KEEP_RS_FROZEN 5
193 194
194/* Array elements of 64 bit needed for rebuild/write_mostly bits */ 195/* Array elements of 64 bit needed for rebuild/write_mostly bits */
195#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) 196#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -2727,6 +2728,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
2727 return r; 2728 return r;
2728 2729
2729 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 2730 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
2731 set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
2730 rs_set_new(rs); 2732 rs_set_new(rs);
2731 } else if (rs_reshape_requested(rs)) { 2733 } else if (rs_reshape_requested(rs)) {
2732 if (rs_is_reshaping(rs)) { 2734 if (rs_is_reshaping(rs)) {
@@ -2767,13 +2769,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
2767 * Would cause allocations in raid1->check_reshape 2769 * Would cause allocations in raid1->check_reshape
2768 * though, thus more issues with potential failures 2770 * though, thus more issues with potential failures
2769 */ 2771 */
2770 else if (rs_is_raid1(rs)) 2772 else if (rs_is_raid1(rs)) {
2773 set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
2771 rs->md.raid_disks = rs->raid_disks; 2774 rs->md.raid_disks = rs->raid_disks;
2775 }
2776
2777 if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
2778 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
2779 set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
2780 }
2772 2781
2773 if (rs->md.raid_disks < rs->raid_disks) 2782 if (rs->md.raid_disks < rs->raid_disks)
2774 set_bit(MD_ARRAY_FIRST_USE, &rs->md.flags); 2783 set_bit(MD_ARRAY_FIRST_USE, &rs->md.flags);
2775 2784
2776 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
2777 rs_set_cur(rs); 2785 rs_set_cur(rs);
2778 } else 2786 } else
2779 rs_set_cur(rs); 2787 rs_set_cur(rs);
@@ -3231,9 +3239,11 @@ static void raid_postsuspend(struct dm_target *ti)
3231{ 3239{
3232 struct raid_set *rs = ti->private; 3240 struct raid_set *rs = ti->private;
3233 3241
3234 mddev_suspend(&rs->md); 3242 if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
3235 rs->md.ro = 1; 3243 if (!rs->md.suspended)
3236 clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags); 3244 mddev_suspend(&rs->md);
3245 rs->md.ro = 1;
3246 }
3237} 3247}
3238 3248
3239static void attempt_restore_of_faulty_devices(struct raid_set *rs) 3249static void attempt_restore_of_faulty_devices(struct raid_set *rs)
@@ -3308,6 +3318,18 @@ static int __load_dirty_region_bitmap(struct raid_set *rs)
3308 return r; 3318 return r;
3309} 3319}
3310 3320
3321/* Enforce updating all superblocks */
3322static void rs_update_sbs(struct raid_set *rs)
3323{
3324 struct mddev *mddev = &rs->md;
3325 int ro = mddev->ro;
3326
3327 set_bit(MD_CHANGE_DEVS, &mddev->flags);
3328 mddev->ro = 0;
3329 md_update_sb(mddev, 1);
3330 mddev->ro = ro;
3331}
3332
3311/* 3333/*
3312 * Reshape changes raid algorithm of @rs to new one within personality 3334 * Reshape changes raid algorithm of @rs to new one within personality
3313 * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes 3335 * (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes
@@ -3356,9 +3378,12 @@ static int rs_start_reshape(struct raid_set *rs)
3356 if (!mddev->suspended) 3378 if (!mddev->suspended)
3357 mddev_suspend(mddev); 3379 mddev_suspend(mddev);
3358 3380
3359 mddev->ro = 0; 3381 /*
3360 md_update_sb(mddev, 1); 3382 * Now reshape got set up, update superblocks to
3361 mddev->ro = 1; 3383 * reflect the fact so that a table reload will
3384 * access proper superblock content in the ctr.
3385 */
3386 rs_update_sbs(rs);
3362 3387
3363 return 0; 3388 return 0;
3364} 3389}
@@ -3375,22 +3400,12 @@ static int raid_preresume(struct dm_target *ti)
3375 3400
3376 /* 3401 /*
3377 * The superblocks need to be updated on disk if the 3402 * The superblocks need to be updated on disk if the
3378 * array is new or __load_dirty_region_bitmap will overwrite them 3403 * array is new or new devices got added (thus zeroed
3379 * in core with old data. 3404 * out by userspace) or __load_dirty_region_bitmap
3380 * 3405 * will overwrite them in core with old data or fail.
3381 * In case the array got modified (takeover/reshape/resize)
3382 * or the data offsets on the component devices changed, they
3383 * have to be updated as well.
3384 *
3385 * Have to switch to readwrite and back in order to
3386 * allow for the superblock updates.
3387 */ 3406 */
3388 if (test_and_clear_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) { 3407 if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
3389 set_bit(MD_CHANGE_DEVS, &mddev->flags); 3408 rs_update_sbs(rs);
3390 mddev->ro = 0;
3391 md_update_sb(mddev, 1);
3392 mddev->ro = 1;
3393 }
3394 3409
3395 /* 3410 /*
3396 * Disable/enable discard support on raid set after any 3411 * Disable/enable discard support on raid set after any
@@ -3449,14 +3464,26 @@ static void raid_resume(struct dm_target *ti)
3449 * devices are reachable again. 3464 * devices are reachable again.
3450 */ 3465 */
3451 attempt_restore_of_faulty_devices(rs); 3466 attempt_restore_of_faulty_devices(rs);
3452 } 3467 } else {
3468 mddev->ro = 0;
3469 mddev->in_sync = 0;
3453 3470
3454 mddev->ro = 0; 3471 /*
3455 mddev->in_sync = 0; 3472 * When passing in flags to the ctr, we expect userspace
3456 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 3473 * to reset them because they made it to the superblocks
3474 * and reload the mapping anyway.
3475 *
3476 * -> only unfreeze recovery in case of a table reload or
3477 * we'll have a bogus recovery/reshape position
3478 * retrieved from the superblock by the ctr because
3479 * the ongoing recovery/reshape will change it after read.
3480 */
3481 if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
3482 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3457 3483
3458 if (mddev->suspended) 3484 if (mddev->suspended)
3459 mddev_resume(mddev); 3485 mddev_resume(mddev);
3486 }
3460} 3487}
3461 3488
3462static struct target_type raid_target = { 3489static struct target_type raid_target = {