diff options
author | Jonathan Brassow <jbrassow@redhat.com> | 2013-02-20 21:28:10 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2013-02-25 19:55:36 -0500 |
commit | fe5d2f4a15967bbe907e7b3e31e49dae7af7cc6b (patch) | |
tree | 480fe9fd2e9cd0884b375351c4db98a9dfb21aac /drivers/md | |
parent | 9a3152ab024867100f2f50d124b998d05fb1c3f6 (diff) |
DM RAID: Add support for MD's RAID10 "far" and "offset" algorithms
DM RAID: Add support for MD's RAID10 "far" and "offset" algorithms
Until now, dm-raid.c only supported the "near" algorthm of MD's RAID10
implementation. This patch adds support for the "far" and "offset"
algorithms, but only with the improved redundancy that is brought with
the introduction of the 'use_far_sets' bit, which shifts copied stripes
according to smaller sets vs the entire array. That is, the 17th bit
of the 'layout' variable that defines the RAID10 implementation will
always be set. (More information on how the 'layout' variable selects
the RAID10 algorithm can be found in the opening comments of
drivers/md/raid10.c.)
Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-raid.c | 123 |
1 files changed, 103 insertions, 20 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9e58dbd8d8cb..22fd55993723 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -91,15 +91,44 @@ static struct raid_type { | |||
91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
92 | }; | 92 | }; |
93 | 93 | ||
94 | static char *raid10_md_layout_to_format(int layout) | ||
95 | { | ||
96 | /* | ||
97 | * Bit 16 and 17 stand for "offset" and "use_far_sets" | ||
98 | * Refer to MD's raid10.c for details | ||
99 | */ | ||
100 | if ((layout & 0x10000) && (layout & 0x20000)) | ||
101 | return "offset"; | ||
102 | |||
103 | if ((layout & 0xFF) > 1) | ||
104 | return "near"; | ||
105 | |||
106 | return "far"; | ||
107 | } | ||
108 | |||
94 | static unsigned raid10_md_layout_to_copies(int layout) | 109 | static unsigned raid10_md_layout_to_copies(int layout) |
95 | { | 110 | { |
96 | return layout & 0xFF; | 111 | if ((layout & 0xFF) > 1) |
112 | return layout & 0xFF; | ||
113 | return (layout >> 8) & 0xFF; | ||
97 | } | 114 | } |
98 | 115 | ||
99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | 116 | static int raid10_format_to_md_layout(char *format, unsigned copies) |
100 | { | 117 | { |
101 | /* 1 "far" copy, and 'copies' "near" copies */ | 118 | unsigned n = 1, f = 1; |
102 | return (1 << 8) | (copies & 0xFF); | 119 | |
120 | if (!strcmp("near", format)) | ||
121 | n = copies; | ||
122 | else | ||
123 | f = copies; | ||
124 | |||
125 | if (!strcmp("offset", format)) | ||
126 | return 0x30000 | (f << 8) | n; | ||
127 | |||
128 | if (!strcmp("far", format)) | ||
129 | return 0x20000 | (f << 8) | n; | ||
130 | |||
131 | return (f << 8) | n; | ||
103 | } | 132 | } |
104 | 133 | ||
105 | static struct raid_type *get_raid_type(char *name) | 134 | static struct raid_type *get_raid_type(char *name) |
@@ -352,6 +381,7 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
352 | { | 381 | { |
353 | unsigned i, rebuild_cnt = 0; | 382 | unsigned i, rebuild_cnt = 0; |
354 | unsigned rebuilds_per_group, copies, d; | 383 | unsigned rebuilds_per_group, copies, d; |
384 | unsigned group_size, last_group_start; | ||
355 | 385 | ||
356 | for (i = 0; i < rs->md.raid_disks; i++) | 386 | for (i = 0; i < rs->md.raid_disks; i++) |
357 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || | 387 | if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || |
@@ -379,9 +409,6 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
379 | * as long as the failed devices occur in different mirror | 409 | * as long as the failed devices occur in different mirror |
380 | * groups (i.e. different stripes). | 410 | * groups (i.e. different stripes). |
381 | * | 411 | * |
382 | * Right now, we only allow for "near" copies. When other | ||
383 | * formats are added, we will have to check those too. | ||
384 | * | ||
385 | * When checking "near" format, make sure no adjacent devices | 412 | * When checking "near" format, make sure no adjacent devices |
386 | * have failed beyond what can be handled. In addition to the | 413 | * have failed beyond what can be handled. In addition to the |
387 | * simple case where the number of devices is a multiple of the | 414 | * simple case where the number of devices is a multiple of the |
@@ -391,14 +418,41 @@ static int validate_raid_redundancy(struct raid_set *rs) | |||
391 | * A A B B C | 418 | * A A B B C |
392 | * C D D E E | 419 | * C D D E E |
393 | */ | 420 | */ |
394 | for (i = 0; i < rs->md.raid_disks * copies; i++) { | 421 | if (!strcmp("near", raid10_md_layout_to_format(rs->md.layout))) { |
395 | if (!(i % copies)) | 422 | for (i = 0; i < rs->md.raid_disks * copies; i++) { |
423 | if (!(i % copies)) | ||
424 | rebuilds_per_group = 0; | ||
425 | d = i % rs->md.raid_disks; | ||
426 | if ((!rs->dev[d].rdev.sb_page || | ||
427 | !test_bit(In_sync, &rs->dev[d].rdev.flags)) && | ||
428 | (++rebuilds_per_group >= copies)) | ||
429 | goto too_many; | ||
430 | } | ||
431 | break; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * When checking "far" and "offset" formats, we need to ensure | ||
436 | * that the device that holds its copy is not also dead or | ||
437 | * being rebuilt. (Note that "far" and "offset" formats only | ||
438 | * support two copies right now. These formats also only ever | ||
439 | * use the 'use_far_sets' variant.) | ||
440 | * | ||
441 | * This check is somewhat complicated by the need to account | ||
442 | * for arrays that are not a multiple of (far) copies. This | ||
443 | * results in the need to treat the last (potentially larger) | ||
444 | * set differently. | ||
445 | */ | ||
446 | group_size = (rs->md.raid_disks / copies); | ||
447 | last_group_start = (rs->md.raid_disks / group_size) - 1; | ||
448 | last_group_start *= group_size; | ||
449 | for (i = 0; i < rs->md.raid_disks; i++) { | ||
450 | if (!(i % copies) && !(i > last_group_start)) | ||
396 | rebuilds_per_group = 0; | 451 | rebuilds_per_group = 0; |
397 | d = i % rs->md.raid_disks; | 452 | if ((!rs->dev[i].rdev.sb_page || |
398 | if ((!rs->dev[d].rdev.sb_page || | 453 | !test_bit(In_sync, &rs->dev[i].rdev.flags)) && |
399 | !test_bit(In_sync, &rs->dev[d].rdev.flags)) && | ||
400 | (++rebuilds_per_group >= copies)) | 454 | (++rebuilds_per_group >= copies)) |
401 | goto too_many; | 455 | goto too_many; |
402 | } | 456 | } |
403 | break; | 457 | break; |
404 | default: | 458 | default: |
@@ -433,7 +487,7 @@ too_many: | |||
433 | * | 487 | * |
434 | * RAID10-only options: | 488 | * RAID10-only options: |
435 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | 489 | * [raid10_copies <# copies>] Number of copies. (Default: 2) |
436 | * [raid10_format <near>] Layout algorithm. (Default: near) | 490 | * [raid10_format <near|far|offset>] Layout algorithm. (Default: near) |
437 | */ | 491 | */ |
438 | static int parse_raid_params(struct raid_set *rs, char **argv, | 492 | static int parse_raid_params(struct raid_set *rs, char **argv, |
439 | unsigned num_raid_params) | 493 | unsigned num_raid_params) |
@@ -520,7 +574,9 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
520 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | 574 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; |
521 | return -EINVAL; | 575 | return -EINVAL; |
522 | } | 576 | } |
523 | if (strcmp("near", argv[i])) { | 577 | if (strcmp("near", argv[i]) && |
578 | strcmp("far", argv[i]) && | ||
579 | strcmp("offset", argv[i])) { | ||
524 | rs->ti->error = "Invalid 'raid10_format' value given"; | 580 | rs->ti->error = "Invalid 'raid10_format' value given"; |
525 | return -EINVAL; | 581 | return -EINVAL; |
526 | } | 582 | } |
@@ -644,6 +700,15 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
644 | return -EINVAL; | 700 | return -EINVAL; |
645 | } | 701 | } |
646 | 702 | ||
703 | /* | ||
704 | * If the format is not "near", we only support | ||
705 | * two copies at the moment. | ||
706 | */ | ||
707 | if (strcmp("near", raid10_format) && (raid10_copies > 2)) { | ||
708 | rs->ti->error = "Too many copies for given RAID10 format."; | ||
709 | return -EINVAL; | ||
710 | } | ||
711 | |||
647 | /* (Len * #mirrors) / #devices */ | 712 | /* (Len * #mirrors) / #devices */ |
648 | sectors_per_dev = rs->ti->len * raid10_copies; | 713 | sectors_per_dev = rs->ti->len * raid10_copies; |
649 | sector_div(sectors_per_dev, rs->md.raid_disks); | 714 | sector_div(sectors_per_dev, rs->md.raid_disks); |
@@ -854,17 +919,30 @@ static int super_init_validation(struct mddev *mddev, struct md_rdev *rdev) | |||
854 | /* | 919 | /* |
855 | * Reshaping is not currently allowed | 920 | * Reshaping is not currently allowed |
856 | */ | 921 | */ |
857 | if ((le32_to_cpu(sb->level) != mddev->level) || | 922 | if (le32_to_cpu(sb->level) != mddev->level) { |
858 | (le32_to_cpu(sb->layout) != mddev->layout) || | 923 | DMERR("Reshaping arrays not yet supported. (RAID level change)"); |
859 | (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors)) { | 924 | return -EINVAL; |
860 | DMERR("Reshaping arrays not yet supported."); | 925 | } |
926 | if (le32_to_cpu(sb->layout) != mddev->layout) { | ||
927 | DMERR("Reshaping arrays not yet supported. (RAID layout change)"); | ||
928 | DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout); | ||
929 | DMERR(" Old layout: %s w/ %d copies", | ||
930 | raid10_md_layout_to_format(le32_to_cpu(sb->layout)), | ||
931 | raid10_md_layout_to_copies(le32_to_cpu(sb->layout))); | ||
932 | DMERR(" New layout: %s w/ %d copies", | ||
933 | raid10_md_layout_to_format(mddev->layout), | ||
934 | raid10_md_layout_to_copies(mddev->layout)); | ||
935 | return -EINVAL; | ||
936 | } | ||
937 | if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) { | ||
938 | DMERR("Reshaping arrays not yet supported. (stripe sectors change)"); | ||
861 | return -EINVAL; | 939 | return -EINVAL; |
862 | } | 940 | } |
863 | 941 | ||
864 | /* We can only change the number of devices in RAID1 right now */ | 942 | /* We can only change the number of devices in RAID1 right now */ |
865 | if ((rs->raid_type->level != 1) && | 943 | if ((rs->raid_type->level != 1) && |
866 | (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { | 944 | (le32_to_cpu(sb->num_devices) != mddev->raid_disks)) { |
867 | DMERR("Reshaping arrays not yet supported."); | 945 | DMERR("Reshaping arrays not yet supported. (device count change)"); |
868 | return -EINVAL; | 946 | return -EINVAL; |
869 | } | 947 | } |
870 | 948 | ||
@@ -1329,7 +1407,8 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
1329 | raid10_md_layout_to_copies(rs->md.layout)); | 1407 | raid10_md_layout_to_copies(rs->md.layout)); |
1330 | 1408 | ||
1331 | if (rs->print_flags & DMPF_RAID10_FORMAT) | 1409 | if (rs->print_flags & DMPF_RAID10_FORMAT) |
1332 | DMEMIT(" raid10_format near"); | 1410 | DMEMIT(" raid10_format %s", |
1411 | raid10_md_layout_to_format(rs->md.layout)); | ||
1333 | 1412 | ||
1334 | DMEMIT(" %d", rs->md.raid_disks); | 1413 | DMEMIT(" %d", rs->md.raid_disks); |
1335 | for (i = 0; i < rs->md.raid_disks; i++) { | 1414 | for (i = 0; i < rs->md.raid_disks; i++) { |
@@ -1420,6 +1499,10 @@ static struct target_type raid_target = { | |||
1420 | 1499 | ||
1421 | static int __init dm_raid_init(void) | 1500 | static int __init dm_raid_init(void) |
1422 | { | 1501 | { |
1502 | DMINFO("Loading target version %u.%u.%u", | ||
1503 | raid_target.version[0], | ||
1504 | raid_target.version[1], | ||
1505 | raid_target.version[2]); | ||
1423 | return dm_register_target(&raid_target); | 1506 | return dm_register_target(&raid_target); |
1424 | } | 1507 | } |
1425 | 1508 | ||