diff options
| -rw-r--r-- | Documentation/device-mapper/dm-raid.txt | 26 | ||||
| -rw-r--r-- | drivers/md/dm-raid.c | 95 |
2 files changed, 116 insertions, 5 deletions
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index 946c73342cde..1c1844957166 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt | |||
| @@ -27,6 +27,10 @@ The target is named "raid" and it accepts the following parameters: | |||
| 27 | - rotating parity N (right-to-left) with data restart | 27 | - rotating parity N (right-to-left) with data restart |
| 28 | raid6_nc RAID6 N continue | 28 | raid6_nc RAID6 N continue |
| 29 | - rotating parity N (right-to-left) with data continuation | 29 | - rotating parity N (right-to-left) with data continuation |
| 30 | raid10 Various RAID10 inspired algorithms chosen by additional params | ||
| 31 | - RAID10: Striped Mirrors (aka 'Striping on top of mirrors') | ||
| 32 | - RAID1E: Integrated Adjacent Stripe Mirroring | ||
| 33 | - and other similar RAID10 variants | ||
| 30 | 34 | ||
| 31 | Reference: Chapter 4 of | 35 | Reference: Chapter 4 of |
| 32 | http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf | 36 | http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf |
| @@ -59,6 +63,28 @@ The target is named "raid" and it accepts the following parameters: | |||
| 59 | logical size of the array. The bitmap records the device | 63 | logical size of the array. The bitmap records the device |
| 60 | synchronisation state for each region. | 64 | synchronisation state for each region. |
| 61 | 65 | ||
| 66 | [raid10_copies <# copies>] | ||
| 67 | [raid10_format near] | ||
| 68 | These two options are used to alter the default layout of | ||
| 69 | a RAID10 configuration. The number of copies is can be | ||
| 70 | specified, but the default is 2. There are other variations | ||
| 71 | to how the copies are laid down - the default and only current | ||
| 72 | option is "near". Near copies are what most people think of | ||
| 73 | with respect to mirroring. If these options are left | ||
| 74 | unspecified, or 'raid10_copies 2' and/or 'raid10_format near' | ||
| 75 | are given, then the layouts for 2, 3 and 4 devices are: | ||
| 76 | 2 drives 3 drives 4 drives | ||
| 77 | -------- ---------- -------------- | ||
| 78 | A1 A1 A1 A1 A2 A1 A1 A2 A2 | ||
| 79 | A2 A2 A2 A3 A3 A3 A3 A4 A4 | ||
| 80 | A3 A3 A4 A4 A5 A5 A5 A6 A6 | ||
| 81 | A4 A4 A5 A6 A6 A7 A7 A8 A8 | ||
| 82 | .. .. .. .. .. .. .. .. .. | ||
| 83 | The 2-device layout is equivalent 2-way RAID1. The 4-device | ||
| 84 | layout is what a traditional RAID10 would look like. The | ||
| 85 | 3-device layout is what might be called a 'RAID1E - Integrated | ||
| 86 | Adjacent Stripe Mirroring'. | ||
| 87 | |||
| 62 | <#raid_devs>: The number of devices composing the array. | 88 | <#raid_devs>: The number of devices composing the array. |
| 63 | Each device consists of two entries. The first is the device | 89 | Each device consists of two entries. The first is the device |
| 64 | containing the metadata (if any); the second is the one containing the | 90 | containing the metadata (if any); the second is the one containing the |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f4275a8e860c..691b3c59088e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "md.h" | 11 | #include "md.h" |
| 12 | #include "raid1.h" | 12 | #include "raid1.h" |
| 13 | #include "raid5.h" | 13 | #include "raid5.h" |
| 14 | #include "raid10.h" | ||
| 14 | #include "bitmap.h" | 15 | #include "bitmap.h" |
| 15 | 16 | ||
| 16 | #include <linux/device-mapper.h> | 17 | #include <linux/device-mapper.h> |
| @@ -52,7 +53,10 @@ struct raid_dev { | |||
| 52 | #define DMPF_MAX_RECOVERY_RATE 0x20 | 53 | #define DMPF_MAX_RECOVERY_RATE 0x20 |
| 53 | #define DMPF_MAX_WRITE_BEHIND 0x40 | 54 | #define DMPF_MAX_WRITE_BEHIND 0x40 |
| 54 | #define DMPF_STRIPE_CACHE 0x80 | 55 | #define DMPF_STRIPE_CACHE 0x80 |
| 55 | #define DMPF_REGION_SIZE 0X100 | 56 | #define DMPF_REGION_SIZE 0x100 |
| 57 | #define DMPF_RAID10_COPIES 0x200 | ||
| 58 | #define DMPF_RAID10_FORMAT 0x400 | ||
| 59 | |||
| 56 | struct raid_set { | 60 | struct raid_set { |
| 57 | struct dm_target *ti; | 61 | struct dm_target *ti; |
| 58 | 62 | ||
| @@ -76,6 +80,7 @@ static struct raid_type { | |||
| 76 | const unsigned algorithm; /* RAID algorithm. */ | 80 | const unsigned algorithm; /* RAID algorithm. */ |
| 77 | } raid_types[] = { | 81 | } raid_types[] = { |
| 78 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, | 82 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, |
| 83 | {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, | ||
| 79 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | 84 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, |
| 80 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 85 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, |
| 81 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 86 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, |
| @@ -86,6 +91,17 @@ static struct raid_type { | |||
| 86 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
| 87 | }; | 92 | }; |
| 88 | 93 | ||
| 94 | static unsigned raid10_md_layout_to_copies(int layout) | ||
| 95 | { | ||
| 96 | return layout & 0xFF; | ||
| 97 | } | ||
| 98 | |||
| 99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | ||
| 100 | { | ||
| 101 | /* 1 "far" copy, and 'copies' "near" copies */ | ||
| 102 | return (1 << 8) | (copies & 0xFF); | ||
| 103 | } | ||
| 104 | |||
| 89 | static struct raid_type *get_raid_type(char *name) | 105 | static struct raid_type *get_raid_type(char *name) |
| 90 | { | 106 | { |
| 91 | int i; | 107 | int i; |
| @@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
| 339 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 355 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
| 340 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 356 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
| 341 | * [region_size <sectors>] Defines granularity of bitmap | 357 | * [region_size <sectors>] Defines granularity of bitmap |
| 358 | * | ||
| 359 | * RAID10-only options: | ||
| 360 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | ||
| 361 | * [raid10_format <near>] Layout algorithm. (Default: near) | ||
| 342 | */ | 362 | */ |
| 343 | static int parse_raid_params(struct raid_set *rs, char **argv, | 363 | static int parse_raid_params(struct raid_set *rs, char **argv, |
| 344 | unsigned num_raid_params) | 364 | unsigned num_raid_params) |
| 345 | { | 365 | { |
| 366 | char *raid10_format = "near"; | ||
| 367 | unsigned raid10_copies = 2; | ||
| 346 | unsigned i, rebuild_cnt = 0; | 368 | unsigned i, rebuild_cnt = 0; |
| 347 | unsigned long value, region_size = 0; | 369 | unsigned long value, region_size = 0; |
| 348 | sector_t sectors_per_dev = rs->ti->len; | 370 | sector_t sectors_per_dev = rs->ti->len; |
| @@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 416 | } | 438 | } |
| 417 | 439 | ||
| 418 | key = argv[i++]; | 440 | key = argv[i++]; |
| 441 | |||
| 442 | /* Parameters that take a string value are checked here. */ | ||
| 443 | if (!strcasecmp(key, "raid10_format")) { | ||
| 444 | if (rs->raid_type->level != 10) { | ||
| 445 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | ||
| 446 | return -EINVAL; | ||
| 447 | } | ||
| 448 | if (strcmp("near", argv[i])) { | ||
| 449 | rs->ti->error = "Invalid 'raid10_format' value given"; | ||
| 450 | return -EINVAL; | ||
| 451 | } | ||
| 452 | raid10_format = argv[i]; | ||
| 453 | rs->print_flags |= DMPF_RAID10_FORMAT; | ||
| 454 | continue; | ||
| 455 | } | ||
| 456 | |||
| 419 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 457 | if (strict_strtoul(argv[i], 10, &value) < 0) { |
| 420 | rs->ti->error = "Bad numerical argument given in raid params"; | 458 | rs->ti->error = "Bad numerical argument given in raid params"; |
| 421 | return -EINVAL; | 459 | return -EINVAL; |
| 422 | } | 460 | } |
| 423 | 461 | ||
| 462 | /* Parameters that take a numeric value are checked here */ | ||
| 424 | if (!strcasecmp(key, "rebuild")) { | 463 | if (!strcasecmp(key, "rebuild")) { |
| 425 | rebuild_cnt++; | 464 | rebuild_cnt++; |
| 426 | 465 | ||
| @@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 439 | return -EINVAL; | 478 | return -EINVAL; |
| 440 | } | 479 | } |
| 441 | break; | 480 | break; |
| 481 | case 10: | ||
| 442 | default: | 482 | default: |
| 443 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); | 483 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); |
| 444 | rs->ti->error = "Rebuild not supported for this RAID type"; | 484 | rs->ti->error = "Rebuild not supported for this RAID type"; |
| @@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 495 | */ | 535 | */ |
| 496 | value /= 2; | 536 | value /= 2; |
| 497 | 537 | ||
| 498 | if (rs->raid_type->level < 5) { | 538 | if ((rs->raid_type->level != 5) && |
| 539 | (rs->raid_type->level != 6)) { | ||
| 499 | rs->ti->error = "Inappropriate argument: stripe_cache"; | 540 | rs->ti->error = "Inappropriate argument: stripe_cache"; |
| 500 | return -EINVAL; | 541 | return -EINVAL; |
| 501 | } | 542 | } |
| @@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 520 | } else if (!strcasecmp(key, "region_size")) { | 561 | } else if (!strcasecmp(key, "region_size")) { |
| 521 | rs->print_flags |= DMPF_REGION_SIZE; | 562 | rs->print_flags |= DMPF_REGION_SIZE; |
| 522 | region_size = value; | 563 | region_size = value; |
| 564 | } else if (!strcasecmp(key, "raid10_copies") && | ||
| 565 | (rs->raid_type->level == 10)) { | ||
| 566 | if ((value < 2) || (value > 0xFF)) { | ||
| 567 | rs->ti->error = "Bad value for 'raid10_copies'"; | ||
| 568 | return -EINVAL; | ||
| 569 | } | ||
| 570 | rs->print_flags |= DMPF_RAID10_COPIES; | ||
| 571 | raid10_copies = value; | ||
| 523 | } else { | 572 | } else { |
| 524 | DMERR("Unable to parse RAID parameter: %s", key); | 573 | DMERR("Unable to parse RAID parameter: %s", key); |
| 525 | rs->ti->error = "Unable to parse RAID parameters"; | 574 | rs->ti->error = "Unable to parse RAID parameters"; |
| @@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
| 538 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) | 587 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) |
| 539 | return -EINVAL; | 588 | return -EINVAL; |
| 540 | 589 | ||
| 541 | if ((rs->raid_type->level > 1) && | 590 | if (rs->raid_type->level == 10) { |
| 542 | sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { | 591 | if (raid10_copies > rs->md.raid_disks) { |
| 592 | rs->ti->error = "Not enough devices to satisfy specification"; | ||
| 593 | return -EINVAL; | ||
| 594 | } | ||
| 595 | |||
| 596 | /* (Len * #mirrors) / #devices */ | ||
| 597 | sectors_per_dev = rs->ti->len * raid10_copies; | ||
| 598 | sector_div(sectors_per_dev, rs->md.raid_disks); | ||
| 599 | |||
| 600 | rs->md.layout = raid10_format_to_md_layout(raid10_format, | ||
| 601 | raid10_copies); | ||
| 602 | rs->md.new_layout = rs->md.layout; | ||
| 603 | } else if ((rs->raid_type->level > 1) && | ||
| 604 | sector_div(sectors_per_dev, | ||
| 605 | (rs->md.raid_disks - rs->raid_type->parity_devs))) { | ||
| 543 | rs->ti->error = "Target length not divisible by number of data devices"; | 606 | rs->ti->error = "Target length not divisible by number of data devices"; |
| 544 | return -EINVAL; | 607 | return -EINVAL; |
| 545 | } | 608 | } |
| @@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
| 566 | if (rs->raid_type->level == 1) | 629 | if (rs->raid_type->level == 1) |
| 567 | return md_raid1_congested(&rs->md, bits); | 630 | return md_raid1_congested(&rs->md, bits); |
| 568 | 631 | ||
| 632 | if (rs->raid_type->level == 10) | ||
| 633 | return md_raid10_congested(&rs->md, bits); | ||
| 634 | |||
| 569 | return md_raid5_congested(&rs->md, bits); | 635 | return md_raid5_congested(&rs->md, bits); |
| 570 | } | 636 | } |
| 571 | 637 | ||
| @@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
| 884 | case 6: | 950 | case 6: |
| 885 | redundancy = rs->raid_type->parity_devs; | 951 | redundancy = rs->raid_type->parity_devs; |
| 886 | break; | 952 | break; |
| 953 | case 10: | ||
| 954 | redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; | ||
| 955 | break; | ||
| 887 | default: | 956 | default: |
| 888 | ti->error = "Unknown RAID type"; | 957 | ti->error = "Unknown RAID type"; |
| 889 | return -EINVAL; | 958 | return -EINVAL; |
| @@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 1049 | goto bad; | 1118 | goto bad; |
| 1050 | } | 1119 | } |
| 1051 | 1120 | ||
| 1121 | if (ti->len != rs->md.array_sectors) { | ||
| 1122 | ti->error = "Array size does not match requested target length"; | ||
| 1123 | ret = -EINVAL; | ||
| 1124 | goto size_mismatch; | ||
| 1125 | } | ||
| 1052 | rs->callbacks.congested_fn = raid_is_congested; | 1126 | rs->callbacks.congested_fn = raid_is_congested; |
| 1053 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 1127 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
| 1054 | 1128 | ||
| 1055 | mddev_suspend(&rs->md); | 1129 | mddev_suspend(&rs->md); |
| 1056 | return 0; | 1130 | return 0; |
| 1057 | 1131 | ||
| 1132 | size_mismatch: | ||
| 1133 | md_stop(&rs->md); | ||
| 1058 | bad: | 1134 | bad: |
| 1059 | context_free(rs); | 1135 | context_free(rs); |
| 1060 | 1136 | ||
| @@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
| 1203 | DMEMIT(" region_size %lu", | 1279 | DMEMIT(" region_size %lu", |
| 1204 | rs->md.bitmap_info.chunksize >> 9); | 1280 | rs->md.bitmap_info.chunksize >> 9); |
| 1205 | 1281 | ||
| 1282 | if (rs->print_flags & DMPF_RAID10_COPIES) | ||
| 1283 | DMEMIT(" raid10_copies %u", | ||
| 1284 | raid10_md_layout_to_copies(rs->md.layout)); | ||
| 1285 | |||
| 1286 | if (rs->print_flags & DMPF_RAID10_FORMAT) | ||
| 1287 | DMEMIT(" raid10_format near"); | ||
| 1288 | |||
| 1206 | DMEMIT(" %d", rs->md.raid_disks); | 1289 | DMEMIT(" %d", rs->md.raid_disks); |
| 1207 | for (i = 0; i < rs->md.raid_disks; i++) { | 1290 | for (i = 0; i < rs->md.raid_disks; i++) { |
| 1208 | if (rs->dev[i].meta_dev) | 1291 | if (rs->dev[i].meta_dev) |
| @@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti) | |||
| 1277 | 1360 | ||
| 1278 | static struct target_type raid_target = { | 1361 | static struct target_type raid_target = { |
| 1279 | .name = "raid", | 1362 | .name = "raid", |
| 1280 | .version = {1, 2, 0}, | 1363 | .version = {1, 3, 0}, |
| 1281 | .module = THIS_MODULE, | 1364 | .module = THIS_MODULE, |
| 1282 | .ctr = raid_ctr, | 1365 | .ctr = raid_ctr, |
| 1283 | .dtr = raid_dtr, | 1366 | .dtr = raid_dtr, |
| @@ -1304,6 +1387,8 @@ module_init(dm_raid_init); | |||
| 1304 | module_exit(dm_raid_exit); | 1387 | module_exit(dm_raid_exit); |
| 1305 | 1388 | ||
| 1306 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | 1389 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); |
| 1390 | MODULE_ALIAS("dm-raid1"); | ||
| 1391 | MODULE_ALIAS("dm-raid10"); | ||
| 1307 | MODULE_ALIAS("dm-raid4"); | 1392 | MODULE_ALIAS("dm-raid4"); |
| 1308 | MODULE_ALIAS("dm-raid5"); | 1393 | MODULE_ALIAS("dm-raid5"); |
| 1309 | MODULE_ALIAS("dm-raid6"); | 1394 | MODULE_ALIAS("dm-raid6"); |
