aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c154
1 files changed, 61 insertions, 93 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index cae689de75fd..5ba067fa0c72 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) 2010-2011 Neil Brown 2 * Copyright (C) 2010-2011 Neil Brown
3 * Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This file is released under the GPL. 5 * This file is released under the GPL.
6 */ 6 */
@@ -29,9 +29,6 @@
29 */ 29 */
30#define MIN_RAID456_JOURNAL_SPACE (4*2048) 30#define MIN_RAID456_JOURNAL_SPACE (4*2048)
31 31
32/* Global list of all raid sets */
33static LIST_HEAD(raid_sets);
34
35static bool devices_handle_discard_safely = false; 32static bool devices_handle_discard_safely = false;
36 33
37/* 34/*
@@ -227,7 +224,6 @@ struct rs_layout {
227 224
228struct raid_set { 225struct raid_set {
229 struct dm_target *ti; 226 struct dm_target *ti;
230 struct list_head list;
231 227
232 uint32_t stripe_cache_entries; 228 uint32_t stripe_cache_entries;
233 unsigned long ctr_flags; 229 unsigned long ctr_flags;
@@ -273,19 +269,6 @@ static void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
273 mddev->new_chunk_sectors = l->new_chunk_sectors; 269 mddev->new_chunk_sectors = l->new_chunk_sectors;
274} 270}
275 271
276/* Find any raid_set in active slot for @rs on global list */
277static struct raid_set *rs_find_active(struct raid_set *rs)
278{
279 struct raid_set *r;
280 struct mapped_device *md = dm_table_get_md(rs->ti->table);
281
282 list_for_each_entry(r, &raid_sets, list)
283 if (r != rs && dm_table_get_md(r->ti->table) == md)
284 return r;
285
286 return NULL;
287}
288
289/* raid10 algorithms (i.e. formats) */ 272/* raid10 algorithms (i.e. formats) */
290#define ALGORITHM_RAID10_DEFAULT 0 273#define ALGORITHM_RAID10_DEFAULT 0
291#define ALGORITHM_RAID10_NEAR 1 274#define ALGORITHM_RAID10_NEAR 1
@@ -764,7 +747,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
764 747
765 mddev_init(&rs->md); 748 mddev_init(&rs->md);
766 749
767 INIT_LIST_HEAD(&rs->list);
768 rs->raid_disks = raid_devs; 750 rs->raid_disks = raid_devs;
769 rs->delta_disks = 0; 751 rs->delta_disks = 0;
770 752
@@ -782,9 +764,6 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
782 for (i = 0; i < raid_devs; i++) 764 for (i = 0; i < raid_devs; i++)
783 md_rdev_init(&rs->dev[i].rdev); 765 md_rdev_init(&rs->dev[i].rdev);
784 766
785 /* Add @rs to global list. */
786 list_add(&rs->list, &raid_sets);
787
788 /* 767 /*
789 * Remaining items to be initialized by further RAID params: 768 * Remaining items to be initialized by further RAID params:
790 * rs->md.persistent 769 * rs->md.persistent
@@ -797,7 +776,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
797 return rs; 776 return rs;
798} 777}
799 778
800/* Free all @rs allocations and remove it from global list. */ 779/* Free all @rs allocations */
801static void raid_set_free(struct raid_set *rs) 780static void raid_set_free(struct raid_set *rs)
802{ 781{
803 int i; 782 int i;
@@ -815,8 +794,6 @@ static void raid_set_free(struct raid_set *rs)
815 dm_put_device(rs->ti, rs->dev[i].data_dev); 794 dm_put_device(rs->ti, rs->dev[i].data_dev);
816 } 795 }
817 796
818 list_del(&rs->list);
819
820 kfree(rs); 797 kfree(rs);
821} 798}
822 799
@@ -2649,7 +2626,7 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
2649 return 0; 2626 return 0;
2650 } 2627 }
2651 2628
2652 /* HM FIXME: get InSync raid_dev? */ 2629 /* HM FIXME: get In_Sync raid_dev? */
2653 rdev = &rs->dev[0].rdev; 2630 rdev = &rs->dev[0].rdev;
2654 2631
2655 if (rs->delta_disks < 0) { 2632 if (rs->delta_disks < 0) {
@@ -3149,6 +3126,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
3149 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); 3126 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
3150 rs_set_new(rs); 3127 rs_set_new(rs);
3151 } else if (rs_is_recovering(rs)) { 3128 } else if (rs_is_recovering(rs)) {
3129 /* Rebuild particular devices */
3130 if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
3131 set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
3132 rs_setup_recovery(rs, MaxSector);
3133 }
3152 /* A recovering raid set may be resized */ 3134 /* A recovering raid set may be resized */
3153 ; /* skip setup rs */ 3135 ; /* skip setup rs */
3154 } else if (rs_is_reshaping(rs)) { 3136 } else if (rs_is_reshaping(rs)) {
@@ -3242,6 +3224,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
3242 /* Start raid set read-only and assumed clean to change in raid_resume() */ 3224 /* Start raid set read-only and assumed clean to change in raid_resume() */
3243 rs->md.ro = 1; 3225 rs->md.ro = 1;
3244 rs->md.in_sync = 1; 3226 rs->md.in_sync = 1;
3227
3228 /* Keep array frozen */
3245 set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery); 3229 set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
3246 3230
3247 /* Has to be held on running the array */ 3231 /* Has to be held on running the array */
@@ -3265,7 +3249,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
3265 rs->callbacks.congested_fn = raid_is_congested; 3249 rs->callbacks.congested_fn = raid_is_congested;
3266 dm_table_add_target_callbacks(ti->table, &rs->callbacks); 3250 dm_table_add_target_callbacks(ti->table, &rs->callbacks);
3267 3251
3268 /* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */ 3252 /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
3269 if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) { 3253 if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
3270 r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode); 3254 r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
3271 if (r) { 3255 if (r) {
@@ -3350,32 +3334,53 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
3350 return DM_MAPIO_SUBMITTED; 3334 return DM_MAPIO_SUBMITTED;
3351} 3335}
3352 3336
3353/* Return string describing the current sync action of @mddev */ 3337/* Return sync state string for @state */
3354static const char *decipher_sync_action(struct mddev *mddev, unsigned long recovery) 3338enum sync_state { st_frozen, st_reshape, st_resync, st_check, st_repair, st_recover, st_idle };
3339static const char *sync_str(enum sync_state state)
3340{
3341 /* Has to be in above sync_state order! */
3342 static const char *sync_strs[] = {
3343 "frozen",
3344 "reshape",
3345 "resync",
3346 "check",
3347 "repair",
3348 "recover",
3349 "idle"
3350 };
3351
3352 return __within_range(state, 0, ARRAY_SIZE(sync_strs) - 1) ? sync_strs[state] : "undef";
3353};
3354
3355/* Return enum sync_state for @mddev derived from @recovery flags */
3356static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)
3355{ 3357{
3356 if (test_bit(MD_RECOVERY_FROZEN, &recovery)) 3358 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
3357 return "frozen"; 3359 return st_frozen;
3358 3360
3359 /* The MD sync thread can be done with io but still be running */ 3361 /* The MD sync thread can be done with io or be interrupted but still be running */
3360 if (!test_bit(MD_RECOVERY_DONE, &recovery) && 3362 if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
3361 (test_bit(MD_RECOVERY_RUNNING, &recovery) || 3363 (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
3362 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) { 3364 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
3363 if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) 3365 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
3364 return "reshape"; 3366 return st_reshape;
3365 3367
3366 if (test_bit(MD_RECOVERY_SYNC, &recovery)) { 3368 if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
3367 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery)) 3369 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
3368 return "resync"; 3370 return st_resync;
3369 else if (test_bit(MD_RECOVERY_CHECK, &recovery)) 3371 if (test_bit(MD_RECOVERY_CHECK, &recovery))
3370 return "check"; 3372 return st_check;
3371 return "repair"; 3373 return st_repair;
3372 } 3374 }
3373 3375
3374 if (test_bit(MD_RECOVERY_RECOVER, &recovery)) 3376 if (test_bit(MD_RECOVERY_RECOVER, &recovery))
3375 return "recover"; 3377 return st_recover;
3378
3379 if (mddev->reshape_position != MaxSector)
3380 return st_reshape;
3376 } 3381 }
3377 3382
3378 return "idle"; 3383 return st_idle;
3379} 3384}
3380 3385
3381/* 3386/*
@@ -3409,6 +3414,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
3409 sector_t resync_max_sectors) 3414 sector_t resync_max_sectors)
3410{ 3415{
3411 sector_t r; 3416 sector_t r;
3417 enum sync_state state;
3412 struct mddev *mddev = &rs->md; 3418 struct mddev *mddev = &rs->md;
3413 3419
3414 clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 3420 clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
@@ -3419,20 +3425,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
3419 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 3425 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
3420 3426
3421 } else { 3427 } else {
3422 if (!test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags) && 3428 state = decipher_sync_action(mddev, recovery);
3423 !test_bit(MD_RECOVERY_INTR, &recovery) && 3429
3424 (test_bit(MD_RECOVERY_NEEDED, &recovery) || 3430 if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
3425 test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
3426 test_bit(MD_RECOVERY_RUNNING, &recovery)))
3427 r = mddev->curr_resync_completed;
3428 else
3429 r = mddev->recovery_cp; 3431 r = mddev->recovery_cp;
3432 else
3433 r = mddev->curr_resync_completed;
3430 3434
3431 if (r >= resync_max_sectors && 3435 if (state == st_idle && r >= resync_max_sectors) {
3432 (!test_bit(MD_RECOVERY_REQUESTED, &recovery) ||
3433 (!test_bit(MD_RECOVERY_FROZEN, &recovery) &&
3434 !test_bit(MD_RECOVERY_NEEDED, &recovery) &&
3435 !test_bit(MD_RECOVERY_RUNNING, &recovery)))) {
3436 /* 3436 /*
3437 * Sync complete. 3437 * Sync complete.
3438 */ 3438 */
@@ -3440,24 +3440,20 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
3440 if (test_bit(MD_RECOVERY_RECOVER, &recovery)) 3440 if (test_bit(MD_RECOVERY_RECOVER, &recovery))
3441 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 3441 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
3442 3442
3443 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) { 3443 } else if (state == st_recover)
3444 /* 3444 /*
3445 * In case we are recovering, the array is not in sync 3445 * In case we are recovering, the array is not in sync
3446 * and health chars should show the recovering legs. 3446 * and health chars should show the recovering legs.
3447 */ 3447 */
3448 ; 3448 ;
3449 3449 else if (state == st_resync)
3450 } else if (test_bit(MD_RECOVERY_SYNC, &recovery) &&
3451 !test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
3452 /* 3450 /*
3453 * If "resync" is occurring, the raid set 3451 * If "resync" is occurring, the raid set
3454 * is or may be out of sync hence the health 3452 * is or may be out of sync hence the health
3455 * characters shall be 'a'. 3453 * characters shall be 'a'.
3456 */ 3454 */
3457 set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); 3455 set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
3458 3456 else if (state == st_reshape)
3459 } else if (test_bit(MD_RECOVERY_RESHAPE, &recovery) &&
3460 !test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
3461 /* 3457 /*
3462 * If "reshape" is occurring, the raid set 3458 * If "reshape" is occurring, the raid set
3463 * is or may be out of sync hence the health 3459 * is or may be out of sync hence the health
@@ -3465,7 +3461,7 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
3465 */ 3461 */
3466 set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); 3462 set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
3467 3463
3468 } else if (test_bit(MD_RECOVERY_REQUESTED, &recovery)) { 3464 else if (state == st_check || state == st_repair)
3469 /* 3465 /*
3470 * If "check" or "repair" is occurring, the raid set has 3466 * If "check" or "repair" is occurring, the raid set has
3471 * undergone an initial sync and the health characters 3467 * undergone an initial sync and the health characters
@@ -3473,12 +3469,12 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
3473 */ 3469 */
3474 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); 3470 set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
3475 3471
3476 } else { 3472 else {
3477 struct md_rdev *rdev; 3473 struct md_rdev *rdev;
3478 3474
3479 /* 3475 /*
3480 * We are idle and recovery is needed, prevent 'A' chars race 3476 * We are idle and recovery is needed, prevent 'A' chars race
3481 * caused by components still set to in-sync by constrcuctor. 3477 * caused by components still set to in-sync by constructor.
3482 */ 3478 */
3483 if (test_bit(MD_RECOVERY_NEEDED, &recovery)) 3479 if (test_bit(MD_RECOVERY_NEEDED, &recovery))
3484 set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); 3480 set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
@@ -3542,7 +3538,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
3542 progress = rs_get_progress(rs, recovery, resync_max_sectors); 3538 progress = rs_get_progress(rs, recovery, resync_max_sectors);
3543 resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? 3539 resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
3544 atomic64_read(&mddev->resync_mismatches) : 0; 3540 atomic64_read(&mddev->resync_mismatches) : 0;
3545 sync_action = decipher_sync_action(&rs->md, recovery); 3541 sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
3546 3542
3547 /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ 3543 /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
3548 for (i = 0; i < rs->raid_disks; i++) 3544 for (i = 0; i < rs->raid_disks; i++)
@@ -3892,14 +3888,13 @@ static int rs_start_reshape(struct raid_set *rs)
3892 struct mddev *mddev = &rs->md; 3888 struct mddev *mddev = &rs->md;
3893 struct md_personality *pers = mddev->pers; 3889 struct md_personality *pers = mddev->pers;
3894 3890
3891 /* Don't allow the sync thread to work until the table gets reloaded. */
3892 set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
3893
3895 r = rs_setup_reshape(rs); 3894 r = rs_setup_reshape(rs);
3896 if (r) 3895 if (r)
3897 return r; 3896 return r;
3898 3897
3899 /* Need to be resumed to be able to start reshape, recovery is frozen until raid_resume() though */
3900 if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
3901 mddev_resume(mddev);
3902
3903 /* 3898 /*
3904 * Check any reshape constraints enforced by the personalility 3899 * Check any reshape constraints enforced by the personalility
3905 * 3900 *
@@ -3923,10 +3918,6 @@ static int rs_start_reshape(struct raid_set *rs)
3923 } 3918 }
3924 } 3919 }
3925 3920
3926 /* Suspend because a resume will happen in raid_resume() */
3927 set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags);
3928 mddev_suspend(mddev);
3929
3930 /* 3921 /*
3931 * Now reshape got set up, update superblocks to 3922 * Now reshape got set up, update superblocks to
3932 * reflect the fact so that a table reload will 3923 * reflect the fact so that a table reload will
@@ -3947,29 +3938,6 @@ static int raid_preresume(struct dm_target *ti)
3947 if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags)) 3938 if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
3948 return 0; 3939 return 0;
3949 3940
3950 if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
3951 struct raid_set *rs_active = rs_find_active(rs);
3952
3953 if (rs_active) {
3954 /*
3955 * In case no rebuilds have been requested
3956 * and an active table slot exists, copy
3957 * current resynchonization completed and
3958 * reshape position pointers across from
3959 * suspended raid set in the active slot.
3960 *
3961 * This resumes the new mapping at current
3962 * offsets to continue recover/reshape without
3963 * necessarily redoing a raid set partially or
3964 * causing data corruption in case of a reshape.
3965 */
3966 if (rs_active->md.curr_resync_completed != MaxSector)
3967 mddev->curr_resync_completed = rs_active->md.curr_resync_completed;
3968 if (rs_active->md.reshape_position != MaxSector)
3969 mddev->reshape_position = rs_active->md.reshape_position;
3970 }
3971 }
3972
3973 /* 3941 /*
3974 * The superblocks need to be updated on disk if the 3942 * The superblocks need to be updated on disk if the
3975 * array is new or new devices got added (thus zeroed 3943 * array is new or new devices got added (thus zeroed
@@ -4046,7 +4014,7 @@ static void raid_resume(struct dm_target *ti)
4046 4014
4047static struct target_type raid_target = { 4015static struct target_type raid_target = {
4048 .name = "raid", 4016 .name = "raid",
4049 .version = {1, 13, 2}, 4017 .version = {1, 14, 0},
4050 .module = THIS_MODULE, 4018 .module = THIS_MODULE,
4051 .ctr = raid_ctr, 4019 .ctr = raid_ctr,
4052 .dtr = raid_dtr, 4020 .dtr = raid_dtr,