diff options
-rw-r--r-- | Documentation/device-mapper/dm-raid.txt | 26 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 95 |
2 files changed, 116 insertions, 5 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 946c73342cde..1c1844957166 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt | |||
@@ -27,6 +27,10 @@ The target is named "raid" and it accepts the following parameters: | |||
27 | - rotating parity N (right-to-left) with data restart | 27 | - rotating parity N (right-to-left) with data restart |
28 | raid6_nc RAID6 N continue | 28 | raid6_nc RAID6 N continue |
29 | - rotating parity N (right-to-left) with data continuation | 29 | - rotating parity N (right-to-left) with data continuation |
30 | raid10 Various RAID10 inspired algorithms chosen by additional params | ||
31 | - RAID10: Striped Mirrors (aka 'Striping on top of mirrors') | ||
32 | - RAID1E: Integrated Adjacent Stripe Mirroring | ||
33 | - and other similar RAID10 variants | ||
30 | 34 | ||
31 | Reference: Chapter 4 of | 35 | Reference: Chapter 4 of |
32 | http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf | 36 | http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf |
@@ -59,6 +63,28 @@ The target is named "raid" and it accepts the following parameters: | |||
59 | logical size of the array. The bitmap records the device | 63 | logical size of the array. The bitmap records the device |
60 | synchronisation state for each region. | 64 | synchronisation state for each region. |
61 | 65 | ||
66 | [raid10_copies <# copies>] | ||
67 | [raid10_format near] | ||
68 | These two options are used to alter the default layout of | ||
69 | a RAID10 configuration. The number of copies is can be | ||
70 | specified, but the default is 2. There are other variations | ||
71 | to how the copies are laid down - the default and only current | ||
72 | option is "near". Near copies are what most people think of | ||
73 | with respect to mirroring. If these options are left | ||
74 | unspecified, or 'raid10_copies 2' and/or 'raid10_format near' | ||
75 | are given, then the layouts for 2, 3 and 4 devices are: | ||
76 | 2 drives 3 drives 4 drives | ||
77 | -------- ---------- -------------- | ||
78 | A1 A1 A1 A1 A2 A1 A1 A2 A2 | ||
79 | A2 A2 A2 A3 A3 A3 A3 A4 A4 | ||
80 | A3 A3 A4 A4 A5 A5 A5 A6 A6 | ||
81 | A4 A4 A5 A6 A6 A7 A7 A8 A8 | ||
82 | .. .. .. .. .. .. .. .. .. | ||
83 | The 2-device layout is equivalent 2-way RAID1. The 4-device | ||
84 | layout is what a traditional RAID10 would look like. The | ||
85 | 3-device layout is what might be called a 'RAID1E - Integrated | ||
86 | Adjacent Stripe Mirroring'. | ||
87 | |||
62 | <#raid_devs>: The number of devices composing the array. | 88 | <#raid_devs>: The number of devices composing the array. |
63 | Each device consists of two entries. The first is the device | 89 | Each device consists of two entries. The first is the device |
64 | containing the metadata (if any); the second is the one containing the | 90 | containing the metadata (if any); the second is the one containing the |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f4275a8e860c..691b3c59088e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include "md.h" | 11 | #include "md.h" |
12 | #include "raid1.h" | 12 | #include "raid1.h" |
13 | #include "raid5.h" | 13 | #include "raid5.h" |
14 | #include "raid10.h" | ||
14 | #include "bitmap.h" | 15 | #include "bitmap.h" |
15 | 16 | ||
16 | #include <linux/device-mapper.h> | 17 | #include <linux/device-mapper.h> |
@@ -52,7 +53,10 @@ struct raid_dev { | |||
52 | #define DMPF_MAX_RECOVERY_RATE 0x20 | 53 | #define DMPF_MAX_RECOVERY_RATE 0x20 |
53 | #define DMPF_MAX_WRITE_BEHIND 0x40 | 54 | #define DMPF_MAX_WRITE_BEHIND 0x40 |
54 | #define DMPF_STRIPE_CACHE 0x80 | 55 | #define DMPF_STRIPE_CACHE 0x80 |
55 | #define DMPF_REGION_SIZE 0X100 | 56 | #define DMPF_REGION_SIZE 0x100 |
57 | #define DMPF_RAID10_COPIES 0x200 | ||
58 | #define DMPF_RAID10_FORMAT 0x400 | ||
59 | |||
56 | struct raid_set { | 60 | struct raid_set { |
57 | struct dm_target *ti; | 61 | struct dm_target *ti; |
58 | 62 | ||
@@ -76,6 +80,7 @@ static struct raid_type { | |||
76 | const unsigned algorithm; /* RAID algorithm. */ | 80 | const unsigned algorithm; /* RAID algorithm. */ |
77 | } raid_types[] = { | 81 | } raid_types[] = { |
78 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, | 82 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, |
83 | {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, | ||
79 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | 84 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, |
80 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 85 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, |
81 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 86 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, |
@@ -86,6 +91,17 @@ static struct raid_type { | |||
86 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
87 | }; | 92 | }; |
88 | 93 | ||
94 | static unsigned raid10_md_layout_to_copies(int layout) | ||
95 | { | ||
96 | return layout & 0xFF; | ||
97 | } | ||
98 | |||
99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | ||
100 | { | ||
101 | /* 1 "far" copy, and 'copies' "near" copies */ | ||
102 | return (1 << 8) | (copies & 0xFF); | ||
103 | } | ||
104 | |||
89 | static struct raid_type *get_raid_type(char *name) | 105 | static struct raid_type *get_raid_type(char *name) |
90 | { | 106 | { |
91 | int i; | 107 | int i; |
@@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
339 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 355 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
340 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 356 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
341 | * [region_size <sectors>] Defines granularity of bitmap | 357 | * [region_size <sectors>] Defines granularity of bitmap |
358 | * | ||
359 | * RAID10-only options: | ||
360 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | ||
361 | * [raid10_format <near>] Layout algorithm. (Default: near) | ||
342 | */ | 362 | */ |
343 | static int parse_raid_params(struct raid_set *rs, char **argv, | 363 | static int parse_raid_params(struct raid_set *rs, char **argv, |
344 | unsigned num_raid_params) | 364 | unsigned num_raid_params) |
345 | { | 365 | { |
366 | char *raid10_format = "near"; | ||
367 | unsigned raid10_copies = 2; | ||
346 | unsigned i, rebuild_cnt = 0; | 368 | unsigned i, rebuild_cnt = 0; |
347 | unsigned long value, region_size = 0; | 369 | unsigned long value, region_size = 0; |
348 | sector_t sectors_per_dev = rs->ti->len; | 370 | sector_t sectors_per_dev = rs->ti->len; |
@@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
416 | } | 438 | } |
417 | 439 | ||
418 | key = argv[i++]; | 440 | key = argv[i++]; |
441 | |||
442 | /* Parameters that take a string value are checked here. */ | ||
443 | if (!strcasecmp(key, "raid10_format")) { | ||
444 | if (rs->raid_type->level != 10) { | ||
445 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | ||
446 | return -EINVAL; | ||
447 | } | ||
448 | if (strcmp("near", argv[i])) { | ||
449 | rs->ti->error = "Invalid 'raid10_format' value given"; | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | raid10_format = argv[i]; | ||
453 | rs->print_flags |= DMPF_RAID10_FORMAT; | ||
454 | continue; | ||
455 | } | ||
456 | |||
419 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 457 | if (strict_strtoul(argv[i], 10, &value) < 0) { |
420 | rs->ti->error = "Bad numerical argument given in raid params"; | 458 | rs->ti->error = "Bad numerical argument given in raid params"; |
421 | return -EINVAL; | 459 | return -EINVAL; |
422 | } | 460 | } |
423 | 461 | ||
462 | /* Parameters that take a numeric value are checked here */ | ||
424 | if (!strcasecmp(key, "rebuild")) { | 463 | if (!strcasecmp(key, "rebuild")) { |
425 | rebuild_cnt++; | 464 | rebuild_cnt++; |
426 | 465 | ||
@@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
439 | return -EINVAL; | 478 | return -EINVAL; |
440 | } | 479 | } |
441 | break; | 480 | break; |
481 | case 10: | ||
442 | default: | 482 | default: |
443 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); | 483 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); |
444 | rs->ti->error = "Rebuild not supported for this RAID type"; | 484 | rs->ti->error = "Rebuild not supported for this RAID type"; |
@@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
495 | */ | 535 | */ |
496 | value /= 2; | 536 | value /= 2; |
497 | 537 | ||
498 | if (rs->raid_type->level < 5) { | 538 | if ((rs->raid_type->level != 5) && |
539 | (rs->raid_type->level != 6)) { | ||
499 | rs->ti->error = "Inappropriate argument: stripe_cache"; | 540 | rs->ti->error = "Inappropriate argument: stripe_cache"; |
500 | return -EINVAL; | 541 | return -EINVAL; |
501 | } | 542 | } |
@@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
520 | } else if (!strcasecmp(key, "region_size")) { | 561 | } else if (!strcasecmp(key, "region_size")) { |
521 | rs->print_flags |= DMPF_REGION_SIZE; | 562 | rs->print_flags |= DMPF_REGION_SIZE; |
522 | region_size = value; | 563 | region_size = value; |
564 | } else if (!strcasecmp(key, "raid10_copies") && | ||
565 | (rs->raid_type->level == 10)) { | ||
566 | if ((value < 2) || (value > 0xFF)) { | ||
567 | rs->ti->error = "Bad value for 'raid10_copies'"; | ||
568 | return -EINVAL; | ||
569 | } | ||
570 | rs->print_flags |= DMPF_RAID10_COPIES; | ||
571 | raid10_copies = value; | ||
523 | } else { | 572 | } else { |
524 | DMERR("Unable to parse RAID parameter: %s", key); | 573 | DMERR("Unable to parse RAID parameter: %s", key); |
525 | rs->ti->error = "Unable to parse RAID parameters"; | 574 | rs->ti->error = "Unable to parse RAID parameters"; |
@@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
538 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) | 587 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) |
539 | return -EINVAL; | 588 | return -EINVAL; |
540 | 589 | ||
541 | if ((rs->raid_type->level > 1) && | 590 | if (rs->raid_type->level == 10) { |
542 | sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { | 591 | if (raid10_copies > rs->md.raid_disks) { |
592 | rs->ti->error = "Not enough devices to satisfy specification"; | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | /* (Len * #mirrors) / #devices */ | ||
597 | sectors_per_dev = rs->ti->len * raid10_copies; | ||
598 | sector_div(sectors_per_dev, rs->md.raid_disks); | ||
599 | |||
600 | rs->md.layout = raid10_format_to_md_layout(raid10_format, | ||
601 | raid10_copies); | ||
602 | rs->md.new_layout = rs->md.layout; | ||
603 | } else if ((rs->raid_type->level > 1) && | ||
604 | sector_div(sectors_per_dev, | ||
605 | (rs->md.raid_disks - rs->raid_type->parity_devs))) { | ||
543 | rs->ti->error = "Target length not divisible by number of data devices"; | 606 | rs->ti->error = "Target length not divisible by number of data devices"; |
544 | return -EINVAL; | 607 | return -EINVAL; |
545 | } | 608 | } |
@@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
566 | if (rs->raid_type->level == 1) | 629 | if (rs->raid_type->level == 1) |
567 | return md_raid1_congested(&rs->md, bits); | 630 | return md_raid1_congested(&rs->md, bits); |
568 | 631 | ||
632 | if (rs->raid_type->level == 10) | ||
633 | return md_raid10_congested(&rs->md, bits); | ||
634 | |||
569 | return md_raid5_congested(&rs->md, bits); | 635 | return md_raid5_congested(&rs->md, bits); |
570 | } | 636 | } |
571 | 637 | ||
@@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
884 | case 6: | 950 | case 6: |
885 | redundancy = rs->raid_type->parity_devs; | 951 | redundancy = rs->raid_type->parity_devs; |
886 | break; | 952 | break; |
953 | case 10: | ||
954 | redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; | ||
955 | break; | ||
887 | default: | 956 | default: |
888 | ti->error = "Unknown RAID type"; | 957 | ti->error = "Unknown RAID type"; |
889 | return -EINVAL; | 958 | return -EINVAL; |
@@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1049 | goto bad; | 1118 | goto bad; |
1050 | } | 1119 | } |
1051 | 1120 | ||
1121 | if (ti->len != rs->md.array_sectors) { | ||
1122 | ti->error = "Array size does not match requested target length"; | ||
1123 | ret = -EINVAL; | ||
1124 | goto size_mismatch; | ||
1125 | } | ||
1052 | rs->callbacks.congested_fn = raid_is_congested; | 1126 | rs->callbacks.congested_fn = raid_is_congested; |
1053 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 1127 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
1054 | 1128 | ||
1055 | mddev_suspend(&rs->md); | 1129 | mddev_suspend(&rs->md); |
1056 | return 0; | 1130 | return 0; |
1057 | 1131 | ||
1132 | size_mismatch: | ||
1133 | md_stop(&rs->md); | ||
1058 | bad: | 1134 | bad: |
1059 | context_free(rs); | 1135 | context_free(rs); |
1060 | 1136 | ||
@@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
1203 | DMEMIT(" region_size %lu", | 1279 | DMEMIT(" region_size %lu", |
1204 | rs->md.bitmap_info.chunksize >> 9); | 1280 | rs->md.bitmap_info.chunksize >> 9); |
1205 | 1281 | ||
1282 | if (rs->print_flags & DMPF_RAID10_COPIES) | ||
1283 | DMEMIT(" raid10_copies %u", | ||
1284 | raid10_md_layout_to_copies(rs->md.layout)); | ||
1285 | |||
1286 | if (rs->print_flags & DMPF_RAID10_FORMAT) | ||
1287 | DMEMIT(" raid10_format near"); | ||
1288 | |||
1206 | DMEMIT(" %d", rs->md.raid_disks); | 1289 | DMEMIT(" %d", rs->md.raid_disks); |
1207 | for (i = 0; i < rs->md.raid_disks; i++) { | 1290 | for (i = 0; i < rs->md.raid_disks; i++) { |
1208 | if (rs->dev[i].meta_dev) | 1291 | if (rs->dev[i].meta_dev) |
@@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti) | |||
1277 | 1360 | ||
1278 | static struct target_type raid_target = { | 1361 | static struct target_type raid_target = { |
1279 | .name = "raid", | 1362 | .name = "raid", |
1280 | .version = {1, 2, 0}, | 1363 | .version = {1, 3, 0}, |
1281 | .module = THIS_MODULE, | 1364 | .module = THIS_MODULE, |
1282 | .ctr = raid_ctr, | 1365 | .ctr = raid_ctr, |
1283 | .dtr = raid_dtr, | 1366 | .dtr = raid_dtr, |
@@ -1304,6 +1387,8 @@ module_init(dm_raid_init); | |||
1304 | module_exit(dm_raid_exit); | 1387 | module_exit(dm_raid_exit); |
1305 | 1388 | ||
1306 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | 1389 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); |
1390 | MODULE_ALIAS("dm-raid1"); | ||
1391 | MODULE_ALIAS("dm-raid10"); | ||
1307 | MODULE_ALIAS("dm-raid4"); | 1392 | MODULE_ALIAS("dm-raid4"); |
1308 | MODULE_ALIAS("dm-raid5"); | 1393 | MODULE_ALIAS("dm-raid5"); |
1309 | MODULE_ALIAS("dm-raid6"); | 1394 | MODULE_ALIAS("dm-raid6"); |