aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid.c
diff options
context:
space:
mode:
authorHeinz Mauelshagen <heinzm@redhat.com>2016-11-30 16:31:05 -0500
committerMike Snitzer <snitzer@redhat.com>2017-01-25 06:49:06 -0500
commit63c32ed4afc2afd6b5551a8fcdea5b546dcaca4f (patch)
tree8cdb6bf0a8080912d2590a3d5513b23b8c51ddd1 /drivers/md/dm-raid.c
parent50c4feb9a3e3df9574d952a4ed2f009f8135e4c7 (diff)
dm raid: add raid4/5/6 journaling support
Add md raid4/5/6 journaling support (upstream commit bac624f3f86a started the implementation) which closes the write hole (i.e. non-atomic updates to stripes) using a dedicated journal device. Background: raid4/5/6 stripes hold N data payloads per stripe plus one parity raid4/5 or two raid6 P/Q syndrome payloads in an in-memory stripe cache. Parity or P/Q syndromes used to recover any data payloads in case of a disk failure are calculated from the N data payloads and need to be updated on the different component devices of the raid device. Those are non-atomic, persistent updates. Hence a crash can cause failure to update all stripe payloads persistently and thus cause data loss during stripe recovery. This problem gets addressed by writing whole stripe cache entries (together with journal metadata) to a persistent journal entry on a dedicated journal device. Only if that journal entry is written successfully, the stripe cache entry is updated on the component devices of the raid device (i.e. writethrough type). In case of a crash, the entry can be recovered from the journal and be written again thus ensuring consistent stripe payload suitable to data recovery. Future dependencies: once writeback caching being worked on to compensate for the throughput implictions involved with writethrough overhead is supported with journaling in upstream, an additional patch based on this one will support it in dm-raid. Journal resilience related remarks: because stripes are recovered from the journal in case of a crash, the journal device better be resilient. Resilience becomes mandatory with future writeback support, because loosing the working set in the log means data loss as oposed to writethrough, were the loss of the journal device 'only' reintroduces the write hole. Fix comment on data offsets in parse_dev_params() and initialize new_data_offset as well. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r--drivers/md/dm-raid.c161
1 files changed, 140 insertions, 21 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index d7e652a22a66..e52c493212d0 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -24,6 +24,11 @@
24 */ 24 */
25#define MIN_FREE_RESHAPE_SPACE to_sector(4*4096) 25#define MIN_FREE_RESHAPE_SPACE to_sector(4*4096)
26 26
27/*
28 * Minimum journal space 4 MiB in sectors.
29 */
30#define MIN_RAID456_JOURNAL_SPACE (4*2048)
31
27static bool devices_handle_discard_safely = false; 32static bool devices_handle_discard_safely = false;
28 33
29/* 34/*
@@ -73,6 +78,9 @@ struct raid_dev {
73#define __CTR_FLAG_DATA_OFFSET 13 /* 2 */ /* Only with reshapable raid4/5/6/10! */ 78#define __CTR_FLAG_DATA_OFFSET 13 /* 2 */ /* Only with reshapable raid4/5/6/10! */
74#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */ 79#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */
75 80
81/* New for v1.10.0 */
82#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6! */
83
76/* 84/*
77 * Flags for rs->ctr_flags field. 85 * Flags for rs->ctr_flags field.
78 */ 86 */
@@ -91,6 +99,7 @@ struct raid_dev {
91#define CTR_FLAG_DELTA_DISKS (1 << __CTR_FLAG_DELTA_DISKS) 99#define CTR_FLAG_DELTA_DISKS (1 << __CTR_FLAG_DELTA_DISKS)
92#define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET) 100#define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET)
93#define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS) 101#define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
102#define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV)
94 103
95/* 104/*
96 * Definitions of various constructor flags to 105 * Definitions of various constructor flags to
@@ -163,7 +172,8 @@ struct raid_dev {
163 CTR_FLAG_STRIPE_CACHE | \ 172 CTR_FLAG_STRIPE_CACHE | \
164 CTR_FLAG_REGION_SIZE | \ 173 CTR_FLAG_REGION_SIZE | \
165 CTR_FLAG_DELTA_DISKS | \ 174 CTR_FLAG_DELTA_DISKS | \
166 CTR_FLAG_DATA_OFFSET) 175 CTR_FLAG_DATA_OFFSET | \
176 CTR_FLAG_JOURNAL_DEV)
167 177
168#define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \ 178#define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \
169 CTR_FLAG_REBUILD | \ 179 CTR_FLAG_REBUILD | \
@@ -173,7 +183,8 @@ struct raid_dev {
173 CTR_FLAG_STRIPE_CACHE | \ 183 CTR_FLAG_STRIPE_CACHE | \
174 CTR_FLAG_REGION_SIZE | \ 184 CTR_FLAG_REGION_SIZE | \
175 CTR_FLAG_DELTA_DISKS | \ 185 CTR_FLAG_DELTA_DISKS | \
176 CTR_FLAG_DATA_OFFSET) 186 CTR_FLAG_DATA_OFFSET | \
187 CTR_FLAG_JOURNAL_DEV)
177/* ...valid options definitions per raid level */ 188/* ...valid options definitions per raid level */
178 189
179/* 190/*
@@ -222,6 +233,12 @@ struct raid_set {
222 struct raid_type *raid_type; 233 struct raid_type *raid_type;
223 struct dm_target_callbacks callbacks; 234 struct dm_target_callbacks callbacks;
224 235
236 /* Optional raid4/5/6 journal device */
237 struct journal_dev {
238 struct dm_dev *dev;
239 struct md_rdev rdev;
240 } journal_dev;
241
225 struct raid_dev dev[0]; 242 struct raid_dev dev[0];
226}; 243};
227 244
@@ -306,6 +323,7 @@ static struct arg_name_flag {
306 { CTR_FLAG_DATA_OFFSET, "data_offset"}, 323 { CTR_FLAG_DATA_OFFSET, "data_offset"},
307 { CTR_FLAG_DELTA_DISKS, "delta_disks"}, 324 { CTR_FLAG_DELTA_DISKS, "delta_disks"},
308 { CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"}, 325 { CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"},
326 { CTR_FLAG_JOURNAL_DEV, "journal_dev" },
309}; 327};
310 328
311/* Return argument name string for given @flag */ 329/* Return argument name string for given @flag */
@@ -627,7 +645,8 @@ static void rs_set_capacity(struct raid_set *rs)
627 * is unintended in case of out-of-place reshaping 645 * is unintended in case of out-of-place reshaping
628 */ 646 */
629 rdev_for_each(rdev, mddev) 647 rdev_for_each(rdev, mddev)
630 rdev->sectors = mddev->dev_sectors; 648 if (!test_bit(Journal, &rdev->flags))
649 rdev->sectors = mddev->dev_sectors;
631 650
632 set_capacity(gendisk, mddev->array_sectors); 651 set_capacity(gendisk, mddev->array_sectors);
633 revalidate_disk(gendisk); 652 revalidate_disk(gendisk);
@@ -713,6 +732,11 @@ static void raid_set_free(struct raid_set *rs)
713{ 732{
714 int i; 733 int i;
715 734
735 if (rs->journal_dev.dev) {
736 md_rdev_clear(&rs->journal_dev.rdev);
737 dm_put_device(rs->ti, rs->journal_dev.dev);
738 }
739
716 for (i = 0; i < rs->raid_disks; i++) { 740 for (i = 0; i < rs->raid_disks; i++) {
717 if (rs->dev[i].meta_dev) 741 if (rs->dev[i].meta_dev)
718 dm_put_device(rs->ti, rs->dev[i].meta_dev); 742 dm_put_device(rs->ti, rs->dev[i].meta_dev);
@@ -760,10 +784,11 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
760 rs->dev[i].data_dev = NULL; 784 rs->dev[i].data_dev = NULL;
761 785
762 /* 786 /*
763 * There are no offsets, since there is a separate device 787 * There are no offsets initially.
764 * for data and metadata. 788 * Out of place reshape will set them accordingly.
765 */ 789 */
766 rs->dev[i].rdev.data_offset = 0; 790 rs->dev[i].rdev.data_offset = 0;
791 rs->dev[i].rdev.new_data_offset = 0;
767 rs->dev[i].rdev.mddev = &rs->md; 792 rs->dev[i].rdev.mddev = &rs->md;
768 793
769 arg = dm_shift_arg(as); 794 arg = dm_shift_arg(as);
@@ -821,6 +846,9 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
821 rebuild++; 846 rebuild++;
822 } 847 }
823 848
849 if (rs->journal_dev.dev)
850 list_add_tail(&rs->journal_dev.rdev.same_set, &rs->md.disks);
851
824 if (metadata_available) { 852 if (metadata_available) {
825 rs->md.external = 0; 853 rs->md.external = 0;
826 rs->md.persistent = 1; 854 rs->md.persistent = 1;
@@ -1026,6 +1054,8 @@ too_many:
1026 * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 1054 * [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
1027 * [stripe_cache <sectors>] Stripe cache size for higher RAIDs 1055 * [stripe_cache <sectors>] Stripe cache size for higher RAIDs
1028 * [region_size <sectors>] Defines granularity of bitmap 1056 * [region_size <sectors>] Defines granularity of bitmap
1057 * [journal_dev <dev>] raid4/5/6 journaling deviice
1058 * (i.e. write hole closing log)
1029 * 1059 *
1030 * RAID10-only options: 1060 * RAID10-only options:
1031 * [raid10_copies <# copies>] Number of copies. (Default: 2) 1061 * [raid10_copies <# copies>] Number of copies. (Default: 2)
@@ -1133,7 +1163,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
1133 /* 1163 /*
1134 * Parameters that take a string value are checked here. 1164 * Parameters that take a string value are checked here.
1135 */ 1165 */
1136 1166 /* "raid10_format {near|offset|far} */
1137 if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) { 1167 if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) {
1138 if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) { 1168 if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) {
1139 rs->ti->error = "Only one 'raid10_format' argument pair allowed"; 1169 rs->ti->error = "Only one 'raid10_format' argument pair allowed";
@@ -1151,6 +1181,41 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
1151 continue; 1181 continue;
1152 } 1182 }
1153 1183
1184 /* "journal_dev dev" */
1185 if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) {
1186 int r;
1187 struct md_rdev *jdev;
1188
1189 if (test_and_set_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
1190 rs->ti->error = "Only one raid4/5/6 set journaling device allowed";
1191 return -EINVAL;
1192 }
1193 if (!rt_is_raid456(rt)) {
1194 rs->ti->error = "'journal_dev' is an invalid parameter for this RAID type";
1195 return -EINVAL;
1196 }
1197 r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
1198 &rs->journal_dev.dev);
1199 if (r) {
1200 rs->ti->error = "raid4/5/6 journal device lookup failure";
1201 return r;
1202 }
1203 jdev = &rs->journal_dev.rdev;
1204 md_rdev_init(jdev);
1205 jdev->mddev = &rs->md;
1206 jdev->bdev = rs->journal_dev.dev->bdev;
1207 jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode));
1208 if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) {
1209 rs->ti->error = "No space for raid4/5/6 journal";
1210 return -ENOSPC;
1211 }
1212 set_bit(Journal, &jdev->flags);
1213 continue;
1214 }
1215
1216 /*
1217 * Parameters with number values from here on.
1218 */
1154 if (kstrtoint(arg, 10, &value) < 0) { 1219 if (kstrtoint(arg, 10, &value) < 0) {
1155 rs->ti->error = "Bad numerical argument given in raid params"; 1220 rs->ti->error = "Bad numerical argument given in raid params";
1156 return -EINVAL; 1221 return -EINVAL;
@@ -1436,7 +1501,8 @@ static sector_t __rdev_sectors(struct raid_set *rs)
1436 for (i = 0; i < rs->md.raid_disks; i++) { 1501 for (i = 0; i < rs->md.raid_disks; i++) {
1437 struct md_rdev *rdev = &rs->dev[i].rdev; 1502 struct md_rdev *rdev = &rs->dev[i].rdev;
1438 1503
1439 if (rdev->bdev && rdev->sectors) 1504 if (!test_bit(Journal, &rdev->flags) &&
1505 rdev->bdev && rdev->sectors)
1440 return rdev->sectors; 1506 return rdev->sectors;
1441 } 1507 }
1442 1508
@@ -1486,7 +1552,8 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
1486 array_sectors = (data_stripes + delta_disks) * dev_sectors; 1552 array_sectors = (data_stripes + delta_disks) * dev_sectors;
1487 1553
1488 rdev_for_each(rdev, mddev) 1554 rdev_for_each(rdev, mddev)
1489 rdev->sectors = dev_sectors; 1555 if (!test_bit(Journal, &rdev->flags))
1556 rdev->sectors = dev_sectors;
1490 1557
1491 mddev->array_sectors = array_sectors; 1558 mddev->array_sectors = array_sectors;
1492 mddev->dev_sectors = dev_sectors; 1559 mddev->dev_sectors = dev_sectors;
@@ -2164,6 +2231,9 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
2164 */ 2231 */
2165 d = 0; 2232 d = 0;
2166 rdev_for_each(r, mddev) { 2233 rdev_for_each(r, mddev) {
2234 if (test_bit(Journal, &rdev->flags))
2235 continue;
2236
2167 if (test_bit(FirstUse, &r->flags)) 2237 if (test_bit(FirstUse, &r->flags))
2168 new_devs++; 2238 new_devs++;
2169 2239
@@ -2219,7 +2289,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
2219 */ 2289 */
2220 sb_retrieve_failed_devices(sb, failed_devices); 2290 sb_retrieve_failed_devices(sb, failed_devices);
2221 rdev_for_each(r, mddev) { 2291 rdev_for_each(r, mddev) {
2222 if (!r->sb_page) 2292 if (test_bit(Journal, &rdev->flags) ||
2293 !r->sb_page)
2223 continue; 2294 continue;
2224 sb2 = page_address(r->sb_page); 2295 sb2 = page_address(r->sb_page);
2225 sb2->failed_devices = 0; 2296 sb2->failed_devices = 0;
@@ -2339,6 +2410,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
2339 2410
2340 freshest = NULL; 2411 freshest = NULL;
2341 rdev_for_each(rdev, mddev) { 2412 rdev_for_each(rdev, mddev) {
2413 if (test_bit(Journal, &rdev->flags))
2414 continue;
2415
2342 /* 2416 /*
2343 * Skipping super_load due to CTR_FLAG_SYNC will cause 2417 * Skipping super_load due to CTR_FLAG_SYNC will cause
2344 * the array to undergo initialization again as 2418 * the array to undergo initialization again as
@@ -2402,7 +2476,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
2402 return -EINVAL; 2476 return -EINVAL;
2403 2477
2404 rdev_for_each(rdev, mddev) 2478 rdev_for_each(rdev, mddev)
2405 if ((rdev != freshest) && super_validate(rs, rdev)) 2479 if (!test_bit(Journal, &rdev->flags) &&
2480 rdev != freshest &&
2481 super_validate(rs, rdev))
2406 return -EINVAL; 2482 return -EINVAL;
2407 return 0; 2483 return 0;
2408} 2484}
@@ -2489,10 +2565,12 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
2489 return -ENOSPC; 2565 return -ENOSPC;
2490 } 2566 }
2491out: 2567out:
2492 /* Adjust data offsets on all rdevs */ 2568 /* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */
2493 rdev_for_each(rdev, &rs->md) { 2569 rdev_for_each(rdev, &rs->md) {
2494 rdev->data_offset = data_offset; 2570 if (!test_bit(Journal, &rdev->flags)) {
2495 rdev->new_data_offset = new_data_offset; 2571 rdev->data_offset = data_offset;
2572 rdev->new_data_offset = new_data_offset;
2573 }
2496 } 2574 }
2497 2575
2498 return 0; 2576 return 0;
@@ -2505,8 +2583,10 @@ static void __reorder_raid_disk_indexes(struct raid_set *rs)
2505 struct md_rdev *rdev; 2583 struct md_rdev *rdev;
2506 2584
2507 rdev_for_each(rdev, &rs->md) { 2585 rdev_for_each(rdev, &rs->md) {
2508 rdev->raid_disk = i++; 2586 if (!test_bit(Journal, &rdev->flags)) {
2509 rdev->saved_raid_disk = rdev->new_raid_disk = -1; 2587 rdev->raid_disk = i++;
2588 rdev->saved_raid_disk = rdev->new_raid_disk = -1;
2589 }
2510 } 2590 }
2511} 2591}
2512 2592
@@ -2903,6 +2983,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2903 goto bad; 2983 goto bad;
2904 } 2984 }
2905 2985
2986 /* We can't takeover a journaled raid4/5/6 */
2987 if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
2988 ti->error = "Can't takeover a journaled raid4/5/6 set";
2989 r = -EPERM;
2990 goto bad;
2991 }
2992
2906 /* 2993 /*
2907 * If a takeover is needed, userspace sets any additional 2994 * If a takeover is needed, userspace sets any additional
2908 * devices to rebuild and we can check for a valid request here. 2995 * devices to rebuild and we can check for a valid request here.
@@ -2925,6 +3012,18 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2925 rs_set_new(rs); 3012 rs_set_new(rs);
2926 } else if (rs_reshape_requested(rs)) { 3013 } else if (rs_reshape_requested(rs)) {
2927 /* 3014 /*
3015 * No need to check for 'ongoing' takeover here, because takeover
3016 * is an instant operation as oposed to an ongoing reshape.
3017 */
3018
3019 /* We can't reshape a journaled raid4/5/6 */
3020 if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
3021 ti->error = "Can't reshape a journaled raid4/5/6 set";
3022 r = -EPERM;
3023 goto bad;
3024 }
3025
3026 /*
2928 * We can only prepare for a reshape here, because the 3027 * We can only prepare for a reshape here, because the
2929 * raid set needs to run to provide the repective reshape 3028 * raid set needs to run to provide the repective reshape
2930 * check functions via its MD personality instance. 3029 * check functions via its MD personality instance.
@@ -3072,13 +3171,13 @@ static const char *decipher_sync_action(struct mddev *mddev)
3072} 3171}
3073 3172
3074/* 3173/*
3075 * Return status string @rdev 3174 * Return status string for @rdev
3076 * 3175 *
3077 * Status characters: 3176 * Status characters:
3078 * 3177 *
3079 * 'D' = Dead/Failed device 3178 * 'D' = Dead/Failed raid set component or raid4/5/6 journal device
3080 * 'a' = Alive but not in-sync 3179 * 'a' = Alive but not in-sync
3081 * 'A' = Alive and in-sync 3180 * 'A' = Alive and in-sync raid set component or alive raid4/5/6 journal device
3082 * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr) 3181 * '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
3083 */ 3182 */
3084static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync) 3183static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
@@ -3087,6 +3186,8 @@ static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
3087 return "-"; 3186 return "-";
3088 else if (test_bit(Faulty, &rdev->flags)) 3187 else if (test_bit(Faulty, &rdev->flags))
3089 return "D"; 3188 return "D";
3189 else if (test_bit(Journal, &rdev->flags))
3190 return "A";
3090 else if (!array_in_sync || !test_bit(In_sync, &rdev->flags)) 3191 else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
3091 return "a"; 3192 return "a";
3092 else 3193 else
@@ -3155,7 +3256,8 @@ static sector_t rs_get_progress(struct raid_set *rs,
3155 * being initialized. 3256 * being initialized.
3156 */ 3257 */
3157 rdev_for_each(rdev, mddev) 3258 rdev_for_each(rdev, mddev)
3158 if (!test_bit(In_sync, &rdev->flags)) 3259 if (!test_bit(Journal, &rdev->flags) &&
3260 !test_bit(In_sync, &rdev->flags))
3159 *array_in_sync = true; 3261 *array_in_sync = true;
3160#if 0 3262#if 0
3161 r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */ 3263 r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */
@@ -3255,6 +3357,12 @@ static void raid_status(struct dm_target *ti, status_type_t type,
3255 * so retrieving it from the first raid disk is sufficient. 3357 * so retrieving it from the first raid disk is sufficient.
3256 */ 3358 */
3257 DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset); 3359 DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset);
3360
3361 /*
3362 * v1.10.0+:
3363 */
3364 DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ?
3365 __raid_dev_status(&rs->journal_dev.rdev, 0) : "-");
3258 break; 3366 break;
3259 3367
3260 case STATUSTYPE_TABLE: 3368 case STATUSTYPE_TABLE:
@@ -3268,7 +3376,8 @@ static void raid_status(struct dm_target *ti, status_type_t type,
3268 raid_param_cnt += rebuild_disks * 2 + 3376 raid_param_cnt += rebuild_disks * 2 +
3269 write_mostly_params + 3377 write_mostly_params +
3270 hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + 3378 hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
3271 hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2; 3379 hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 +
3380 (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0);
3272 /* Emit table line */ 3381 /* Emit table line */
3273 DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); 3382 DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
3274 if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) 3383 if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
@@ -3315,6 +3424,9 @@ static void raid_status(struct dm_target *ti, status_type_t type,
3315 if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags)) 3424 if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
3316 DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE), 3425 DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
3317 mddev->sync_speed_min); 3426 mddev->sync_speed_min);
3427 if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags))
3428 DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV),
3429 __get_dev_name(rs->journal_dev.dev));
3318 DMEMIT(" %d", rs->raid_disks); 3430 DMEMIT(" %d", rs->raid_disks);
3319 for (i = 0; i < rs->raid_disks; i++) 3431 for (i = 0; i < rs->raid_disks; i++)
3320 DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev), 3432 DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
@@ -3432,6 +3544,10 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3432 3544
3433 for (i = 0; i < mddev->raid_disks; i++) { 3545 for (i = 0; i < mddev->raid_disks; i++) {
3434 r = &rs->dev[i].rdev; 3546 r = &rs->dev[i].rdev;
3547 /* HM FIXME: enhance journal device recovery processing */
3548 if (test_bit(Journal, &r->flags))
3549 continue;
3550
3435 if (test_bit(Faulty, &r->flags) && r->sb_page && 3551 if (test_bit(Faulty, &r->flags) && r->sb_page &&
3436 sync_page_io(r, 0, r->sb_size, r->sb_page, 3552 sync_page_io(r, 0, r->sb_size, r->sb_page,
3437 REQ_OP_READ, 0, true)) { 3553 REQ_OP_READ, 0, true)) {
@@ -3480,6 +3596,9 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
3480 uint64_t failed_devices[DISKS_ARRAY_ELEMS]; 3596 uint64_t failed_devices[DISKS_ARRAY_ELEMS];
3481 3597
3482 rdev_for_each(r, &rs->md) { 3598 rdev_for_each(r, &rs->md) {
3599 if (test_bit(Journal, &r->flags))
3600 continue;
3601
3483 sb = page_address(r->sb_page); 3602 sb = page_address(r->sb_page);
3484 sb_retrieve_failed_devices(sb, failed_devices); 3603 sb_retrieve_failed_devices(sb, failed_devices);
3485 3604
@@ -3658,7 +3777,7 @@ static void raid_resume(struct dm_target *ti)
3658 3777
3659static struct target_type raid_target = { 3778static struct target_type raid_target = {
3660 .name = "raid", 3779 .name = "raid",
3661 .version = {1, 9, 2}, 3780 .version = {1, 10, 0},
3662 .module = THIS_MODULE, 3781 .module = THIS_MODULE,
3663 .ctr = raid_ctr, 3782 .ctr = raid_ctr,
3664 .dtr = raid_dtr, 3783 .dtr = raid_dtr,