aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/device-mapper/dm-raid.txt26
-rw-r--r--drivers/md/dm-raid.c95
2 files changed, 116 insertions, 5 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
index 946c73342cde..1c1844957166 100644
--- a/Documentation/device-mapper/dm-raid.txt
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -27,6 +27,10 @@ The target is named "raid" and it accepts the following parameters:
27 - rotating parity N (right-to-left) with data restart 27 - rotating parity N (right-to-left) with data restart
28 raid6_nc RAID6 N continue 28 raid6_nc RAID6 N continue
29 - rotating parity N (right-to-left) with data continuation 29 - rotating parity N (right-to-left) with data continuation
30 raid10 Various RAID10 inspired algorithms chosen by additional params
31 - RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
32 - RAID1E: Integrated Adjacent Stripe Mirroring
33 - and other similar RAID10 variants
30 34
31 Reference: Chapter 4 of 35 Reference: Chapter 4 of
32 http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf 36 http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
@@ -59,6 +63,28 @@ The target is named "raid" and it accepts the following parameters:
59 logical size of the array. The bitmap records the device 63 logical size of the array. The bitmap records the device
60 synchronisation state for each region. 64 synchronisation state for each region.
61 65
66 [raid10_copies <# copies>]
67 [raid10_format near]
68 These two options are used to alter the default layout of
69 a RAID10 configuration. The number of copies is can be
70 specified, but the default is 2. There are other variations
71 to how the copies are laid down - the default and only current
72 option is "near". Near copies are what most people think of
73 with respect to mirroring. If these options are left
74 unspecified, or 'raid10_copies 2' and/or 'raid10_format near'
75 are given, then the layouts for 2, 3 and 4 devices are:
76 2 drives 3 drives 4 drives
77 -------- ---------- --------------
78 A1 A1 A1 A1 A2 A1 A1 A2 A2
79 A2 A2 A2 A3 A3 A3 A3 A4 A4
80 A3 A3 A4 A4 A5 A5 A5 A6 A6
81 A4 A4 A5 A6 A6 A7 A7 A8 A8
82 .. .. .. .. .. .. .. .. ..
83 The 2-device layout is equivalent 2-way RAID1. The 4-device
84 layout is what a traditional RAID10 would look like. The
85 3-device layout is what might be called a 'RAID1E - Integrated
86 Adjacent Stripe Mirroring'.
87
62<#raid_devs>: The number of devices composing the array. 88<#raid_devs>: The number of devices composing the array.
63 Each device consists of two entries. The first is the device 89 Each device consists of two entries. The first is the device
64 containing the metadata (if any); the second is the one containing the 90 containing the metadata (if any); the second is the one containing the
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index f4275a8e860c..691b3c59088e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -11,6 +11,7 @@
11#include "md.h" 11#include "md.h"
12#include "raid1.h" 12#include "raid1.h"
13#include "raid5.h" 13#include "raid5.h"
14#include "raid10.h"
14#include "bitmap.h" 15#include "bitmap.h"
15 16
16#include <linux/device-mapper.h> 17#include <linux/device-mapper.h>
@@ -52,7 +53,10 @@ struct raid_dev {
52#define DMPF_MAX_RECOVERY_RATE 0x20 53#define DMPF_MAX_RECOVERY_RATE 0x20
53#define DMPF_MAX_WRITE_BEHIND 0x40 54#define DMPF_MAX_WRITE_BEHIND 0x40
54#define DMPF_STRIPE_CACHE 0x80 55#define DMPF_STRIPE_CACHE 0x80
55#define DMPF_REGION_SIZE 0X100 56#define DMPF_REGION_SIZE 0x100
57#define DMPF_RAID10_COPIES 0x200
58#define DMPF_RAID10_FORMAT 0x400
59
56struct raid_set { 60struct raid_set {
57 struct dm_target *ti; 61 struct dm_target *ti;
58 62
@@ -76,6 +80,7 @@ static struct raid_type {
76 const unsigned algorithm; /* RAID algorithm. */ 80 const unsigned algorithm; /* RAID algorithm. */
77} raid_types[] = { 81} raid_types[] = {
78 {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, 82 {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
83 {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */},
79 {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, 84 {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
80 {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, 85 {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
81 {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, 86 {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
@@ -86,6 +91,17 @@ static struct raid_type {
86 {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} 91 {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
87}; 92};
88 93
94static unsigned raid10_md_layout_to_copies(int layout)
95{
96 return layout & 0xFF;
97}
98
99static int raid10_format_to_md_layout(char *format, unsigned copies)
100{
101 /* 1 "far" copy, and 'copies' "near" copies */
102 return (1 << 8) | (copies & 0xFF);
103}
104
89static struct raid_type *get_raid_type(char *name) 105static struct raid_type *get_raid_type(char *name)
90{ 106{
91 int i; 107 int i;
@@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
339 * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) 355 * [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
340 * [stripe_cache <sectors>] Stripe cache size for higher RAIDs 356 * [stripe_cache <sectors>] Stripe cache size for higher RAIDs
341 * [region_size <sectors>] Defines granularity of bitmap 357 * [region_size <sectors>] Defines granularity of bitmap
358 *
359 * RAID10-only options:
360 * [raid10_copies <# copies>] Number of copies. (Default: 2)
361 * [raid10_format <near>] Layout algorithm. (Default: near)
342 */ 362 */
343static int parse_raid_params(struct raid_set *rs, char **argv, 363static int parse_raid_params(struct raid_set *rs, char **argv,
344 unsigned num_raid_params) 364 unsigned num_raid_params)
345{ 365{
366 char *raid10_format = "near";
367 unsigned raid10_copies = 2;
346 unsigned i, rebuild_cnt = 0; 368 unsigned i, rebuild_cnt = 0;
347 unsigned long value, region_size = 0; 369 unsigned long value, region_size = 0;
348 sector_t sectors_per_dev = rs->ti->len; 370 sector_t sectors_per_dev = rs->ti->len;
@@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
416 } 438 }
417 439
418 key = argv[i++]; 440 key = argv[i++];
441
442 /* Parameters that take a string value are checked here. */
443 if (!strcasecmp(key, "raid10_format")) {
444 if (rs->raid_type->level != 10) {
445 rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type";
446 return -EINVAL;
447 }
448 if (strcmp("near", argv[i])) {
449 rs->ti->error = "Invalid 'raid10_format' value given";
450 return -EINVAL;
451 }
452 raid10_format = argv[i];
453 rs->print_flags |= DMPF_RAID10_FORMAT;
454 continue;
455 }
456
419 if (strict_strtoul(argv[i], 10, &value) < 0) { 457 if (strict_strtoul(argv[i], 10, &value) < 0) {
420 rs->ti->error = "Bad numerical argument given in raid params"; 458 rs->ti->error = "Bad numerical argument given in raid params";
421 return -EINVAL; 459 return -EINVAL;
422 } 460 }
423 461
462 /* Parameters that take a numeric value are checked here */
424 if (!strcasecmp(key, "rebuild")) { 463 if (!strcasecmp(key, "rebuild")) {
425 rebuild_cnt++; 464 rebuild_cnt++;
426 465
@@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
439 return -EINVAL; 478 return -EINVAL;
440 } 479 }
441 break; 480 break;
481 case 10:
442 default: 482 default:
443 DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); 483 DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name);
444 rs->ti->error = "Rebuild not supported for this RAID type"; 484 rs->ti->error = "Rebuild not supported for this RAID type";
@@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
495 */ 535 */
496 value /= 2; 536 value /= 2;
497 537
498 if (rs->raid_type->level < 5) { 538 if ((rs->raid_type->level != 5) &&
539 (rs->raid_type->level != 6)) {
499 rs->ti->error = "Inappropriate argument: stripe_cache"; 540 rs->ti->error = "Inappropriate argument: stripe_cache";
500 return -EINVAL; 541 return -EINVAL;
501 } 542 }
@@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
520 } else if (!strcasecmp(key, "region_size")) { 561 } else if (!strcasecmp(key, "region_size")) {
521 rs->print_flags |= DMPF_REGION_SIZE; 562 rs->print_flags |= DMPF_REGION_SIZE;
522 region_size = value; 563 region_size = value;
564 } else if (!strcasecmp(key, "raid10_copies") &&
565 (rs->raid_type->level == 10)) {
566 if ((value < 2) || (value > 0xFF)) {
567 rs->ti->error = "Bad value for 'raid10_copies'";
568 return -EINVAL;
569 }
570 rs->print_flags |= DMPF_RAID10_COPIES;
571 raid10_copies = value;
523 } else { 572 } else {
524 DMERR("Unable to parse RAID parameter: %s", key); 573 DMERR("Unable to parse RAID parameter: %s", key);
525 rs->ti->error = "Unable to parse RAID parameters"; 574 rs->ti->error = "Unable to parse RAID parameters";
@@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
538 if (dm_set_target_max_io_len(rs->ti, max_io_len)) 587 if (dm_set_target_max_io_len(rs->ti, max_io_len))
539 return -EINVAL; 588 return -EINVAL;
540 589
541 if ((rs->raid_type->level > 1) && 590 if (rs->raid_type->level == 10) {
542 sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { 591 if (raid10_copies > rs->md.raid_disks) {
592 rs->ti->error = "Not enough devices to satisfy specification";
593 return -EINVAL;
594 }
595
596 /* (Len * #mirrors) / #devices */
597 sectors_per_dev = rs->ti->len * raid10_copies;
598 sector_div(sectors_per_dev, rs->md.raid_disks);
599
600 rs->md.layout = raid10_format_to_md_layout(raid10_format,
601 raid10_copies);
602 rs->md.new_layout = rs->md.layout;
603 } else if ((rs->raid_type->level > 1) &&
604 sector_div(sectors_per_dev,
605 (rs->md.raid_disks - rs->raid_type->parity_devs))) {
543 rs->ti->error = "Target length not divisible by number of data devices"; 606 rs->ti->error = "Target length not divisible by number of data devices";
544 return -EINVAL; 607 return -EINVAL;
545 } 608 }
@@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
566 if (rs->raid_type->level == 1) 629 if (rs->raid_type->level == 1)
567 return md_raid1_congested(&rs->md, bits); 630 return md_raid1_congested(&rs->md, bits);
568 631
632 if (rs->raid_type->level == 10)
633 return md_raid10_congested(&rs->md, bits);
634
569 return md_raid5_congested(&rs->md, bits); 635 return md_raid5_congested(&rs->md, bits);
570} 636}
571 637
@@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
884 case 6: 950 case 6:
885 redundancy = rs->raid_type->parity_devs; 951 redundancy = rs->raid_type->parity_devs;
886 break; 952 break;
953 case 10:
954 redundancy = raid10_md_layout_to_copies(mddev->layout) - 1;
955 break;
887 default: 956 default:
888 ti->error = "Unknown RAID type"; 957 ti->error = "Unknown RAID type";
889 return -EINVAL; 958 return -EINVAL;
@@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
1049 goto bad; 1118 goto bad;
1050 } 1119 }
1051 1120
1121 if (ti->len != rs->md.array_sectors) {
1122 ti->error = "Array size does not match requested target length";
1123 ret = -EINVAL;
1124 goto size_mismatch;
1125 }
1052 rs->callbacks.congested_fn = raid_is_congested; 1126 rs->callbacks.congested_fn = raid_is_congested;
1053 dm_table_add_target_callbacks(ti->table, &rs->callbacks); 1127 dm_table_add_target_callbacks(ti->table, &rs->callbacks);
1054 1128
1055 mddev_suspend(&rs->md); 1129 mddev_suspend(&rs->md);
1056 return 0; 1130 return 0;
1057 1131
1132size_mismatch:
1133 md_stop(&rs->md);
1058bad: 1134bad:
1059 context_free(rs); 1135 context_free(rs);
1060 1136
@@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type,
1203 DMEMIT(" region_size %lu", 1279 DMEMIT(" region_size %lu",
1204 rs->md.bitmap_info.chunksize >> 9); 1280 rs->md.bitmap_info.chunksize >> 9);
1205 1281
1282 if (rs->print_flags & DMPF_RAID10_COPIES)
1283 DMEMIT(" raid10_copies %u",
1284 raid10_md_layout_to_copies(rs->md.layout));
1285
1286 if (rs->print_flags & DMPF_RAID10_FORMAT)
1287 DMEMIT(" raid10_format near");
1288
1206 DMEMIT(" %d", rs->md.raid_disks); 1289 DMEMIT(" %d", rs->md.raid_disks);
1207 for (i = 0; i < rs->md.raid_disks; i++) { 1290 for (i = 0; i < rs->md.raid_disks; i++) {
1208 if (rs->dev[i].meta_dev) 1291 if (rs->dev[i].meta_dev)
@@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti)
1277 1360
1278static struct target_type raid_target = { 1361static struct target_type raid_target = {
1279 .name = "raid", 1362 .name = "raid",
1280 .version = {1, 2, 0}, 1363 .version = {1, 3, 0},
1281 .module = THIS_MODULE, 1364 .module = THIS_MODULE,
1282 .ctr = raid_ctr, 1365 .ctr = raid_ctr,
1283 .dtr = raid_dtr, 1366 .dtr = raid_dtr,
@@ -1304,6 +1387,8 @@ module_init(dm_raid_init);
1304module_exit(dm_raid_exit); 1387module_exit(dm_raid_exit);
1305 1388
1306MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); 1389MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
1390MODULE_ALIAS("dm-raid1");
1391MODULE_ALIAS("dm-raid10");
1307MODULE_ALIAS("dm-raid4"); 1392MODULE_ALIAS("dm-raid4");
1308MODULE_ALIAS("dm-raid5"); 1393MODULE_ALIAS("dm-raid5");
1309MODULE_ALIAS("dm-raid6"); 1394MODULE_ALIAS("dm-raid6");