diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:01 -0400 |
commit | fcff06c438b60f415af5983efe92811d6aa02ad1 (patch) | |
tree | 704f6598b2de60a86774bc5cf152d4f051bd2dc4 /drivers/md/dm-raid.c | |
parent | 068535f1fef4c90aee23eb7b9b9a71c5b72d7cd0 (diff) | |
parent | 63f33b8dda88923487004b20fba825486d009e7b (diff) |
Merge branch 'for-next' of git://neil.brown.name/md
Pull md updates from NeilBrown.
* 'for-next' of git://neil.brown.name/md:
DM RAID: Add support for MD RAID10
md/RAID1: Add missing case for attempting to repair known bad blocks.
md/raid5: For odirect-write performance, do not set STRIPE_PREREAD_ACTIVE.
md/raid1: don't abort a resync on the first badblock.
md: remove duplicated test on ->openers when calling do_md_stop()
raid5: Add R5_ReadNoMerge flag which prevent bio from merging at block layer
md/raid1: prevent merging too large request
md/raid1: read balance chooses idlest disk for SSD
md/raid1: make sequential read detection per disk based
MD RAID10: Export md_raid10_congested
MD: Move macros from raid1*.h to raid1*.c
MD RAID1: rename mirror_info structure
MD RAID10: rename mirror_info structure
MD RAID10: Fix compiler warning.
raid5: add a per-stripe lock
raid5: remove unnecessary bitmap write optimization
raid5: lockless access raid5 overrided bi_phys_segments
raid5: reduce chance release_stripe() taking device_lock
Diffstat (limited to 'drivers/md/dm-raid.c')
-rw-r--r-- | drivers/md/dm-raid.c | 95 |
1 files changed, 90 insertions, 5 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f2f29c526544..982e3e390c45 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include "md.h" | 11 | #include "md.h" |
12 | #include "raid1.h" | 12 | #include "raid1.h" |
13 | #include "raid5.h" | 13 | #include "raid5.h" |
14 | #include "raid10.h" | ||
14 | #include "bitmap.h" | 15 | #include "bitmap.h" |
15 | 16 | ||
16 | #include <linux/device-mapper.h> | 17 | #include <linux/device-mapper.h> |
@@ -52,7 +53,10 @@ struct raid_dev { | |||
52 | #define DMPF_MAX_RECOVERY_RATE 0x20 | 53 | #define DMPF_MAX_RECOVERY_RATE 0x20 |
53 | #define DMPF_MAX_WRITE_BEHIND 0x40 | 54 | #define DMPF_MAX_WRITE_BEHIND 0x40 |
54 | #define DMPF_STRIPE_CACHE 0x80 | 55 | #define DMPF_STRIPE_CACHE 0x80 |
55 | #define DMPF_REGION_SIZE 0X100 | 56 | #define DMPF_REGION_SIZE 0x100 |
57 | #define DMPF_RAID10_COPIES 0x200 | ||
58 | #define DMPF_RAID10_FORMAT 0x400 | ||
59 | |||
56 | struct raid_set { | 60 | struct raid_set { |
57 | struct dm_target *ti; | 61 | struct dm_target *ti; |
58 | 62 | ||
@@ -76,6 +80,7 @@ static struct raid_type { | |||
76 | const unsigned algorithm; /* RAID algorithm. */ | 80 | const unsigned algorithm; /* RAID algorithm. */ |
77 | } raid_types[] = { | 81 | } raid_types[] = { |
78 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, | 82 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, |
83 | {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, | ||
79 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | 84 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, |
80 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 85 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, |
81 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 86 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, |
@@ -86,6 +91,17 @@ static struct raid_type { | |||
86 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
87 | }; | 92 | }; |
88 | 93 | ||
94 | static unsigned raid10_md_layout_to_copies(int layout) | ||
95 | { | ||
96 | return layout & 0xFF; | ||
97 | } | ||
98 | |||
99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | ||
100 | { | ||
101 | /* 1 "far" copy, and 'copies' "near" copies */ | ||
102 | return (1 << 8) | (copies & 0xFF); | ||
103 | } | ||
104 | |||
89 | static struct raid_type *get_raid_type(char *name) | 105 | static struct raid_type *get_raid_type(char *name) |
90 | { | 106 | { |
91 | int i; | 107 | int i; |
@@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
339 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 355 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
340 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 356 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
341 | * [region_size <sectors>] Defines granularity of bitmap | 357 | * [region_size <sectors>] Defines granularity of bitmap |
358 | * | ||
359 | * RAID10-only options: | ||
360 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | ||
361 | * [raid10_format <near>] Layout algorithm. (Default: near) | ||
342 | */ | 362 | */ |
343 | static int parse_raid_params(struct raid_set *rs, char **argv, | 363 | static int parse_raid_params(struct raid_set *rs, char **argv, |
344 | unsigned num_raid_params) | 364 | unsigned num_raid_params) |
345 | { | 365 | { |
366 | char *raid10_format = "near"; | ||
367 | unsigned raid10_copies = 2; | ||
346 | unsigned i, rebuild_cnt = 0; | 368 | unsigned i, rebuild_cnt = 0; |
347 | unsigned long value, region_size = 0; | 369 | unsigned long value, region_size = 0; |
348 | sector_t sectors_per_dev = rs->ti->len; | 370 | sector_t sectors_per_dev = rs->ti->len; |
@@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
416 | } | 438 | } |
417 | 439 | ||
418 | key = argv[i++]; | 440 | key = argv[i++]; |
441 | |||
442 | /* Parameters that take a string value are checked here. */ | ||
443 | if (!strcasecmp(key, "raid10_format")) { | ||
444 | if (rs->raid_type->level != 10) { | ||
445 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | ||
446 | return -EINVAL; | ||
447 | } | ||
448 | if (strcmp("near", argv[i])) { | ||
449 | rs->ti->error = "Invalid 'raid10_format' value given"; | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | raid10_format = argv[i]; | ||
453 | rs->print_flags |= DMPF_RAID10_FORMAT; | ||
454 | continue; | ||
455 | } | ||
456 | |||
419 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 457 | if (strict_strtoul(argv[i], 10, &value) < 0) { |
420 | rs->ti->error = "Bad numerical argument given in raid params"; | 458 | rs->ti->error = "Bad numerical argument given in raid params"; |
421 | return -EINVAL; | 459 | return -EINVAL; |
422 | } | 460 | } |
423 | 461 | ||
462 | /* Parameters that take a numeric value are checked here */ | ||
424 | if (!strcasecmp(key, "rebuild")) { | 463 | if (!strcasecmp(key, "rebuild")) { |
425 | rebuild_cnt++; | 464 | rebuild_cnt++; |
426 | 465 | ||
@@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
439 | return -EINVAL; | 478 | return -EINVAL; |
440 | } | 479 | } |
441 | break; | 480 | break; |
481 | case 10: | ||
442 | default: | 482 | default: |
443 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); | 483 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); |
444 | rs->ti->error = "Rebuild not supported for this RAID type"; | 484 | rs->ti->error = "Rebuild not supported for this RAID type"; |
@@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
495 | */ | 535 | */ |
496 | value /= 2; | 536 | value /= 2; |
497 | 537 | ||
498 | if (rs->raid_type->level < 5) { | 538 | if ((rs->raid_type->level != 5) && |
539 | (rs->raid_type->level != 6)) { | ||
499 | rs->ti->error = "Inappropriate argument: stripe_cache"; | 540 | rs->ti->error = "Inappropriate argument: stripe_cache"; |
500 | return -EINVAL; | 541 | return -EINVAL; |
501 | } | 542 | } |
@@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
520 | } else if (!strcasecmp(key, "region_size")) { | 561 | } else if (!strcasecmp(key, "region_size")) { |
521 | rs->print_flags |= DMPF_REGION_SIZE; | 562 | rs->print_flags |= DMPF_REGION_SIZE; |
522 | region_size = value; | 563 | region_size = value; |
564 | } else if (!strcasecmp(key, "raid10_copies") && | ||
565 | (rs->raid_type->level == 10)) { | ||
566 | if ((value < 2) || (value > 0xFF)) { | ||
567 | rs->ti->error = "Bad value for 'raid10_copies'"; | ||
568 | return -EINVAL; | ||
569 | } | ||
570 | rs->print_flags |= DMPF_RAID10_COPIES; | ||
571 | raid10_copies = value; | ||
523 | } else { | 572 | } else { |
524 | DMERR("Unable to parse RAID parameter: %s", key); | 573 | DMERR("Unable to parse RAID parameter: %s", key); |
525 | rs->ti->error = "Unable to parse RAID parameters"; | 574 | rs->ti->error = "Unable to parse RAID parameters"; |
@@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
538 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) | 587 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) |
539 | return -EINVAL; | 588 | return -EINVAL; |
540 | 589 | ||
541 | if ((rs->raid_type->level > 1) && | 590 | if (rs->raid_type->level == 10) { |
542 | sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { | 591 | if (raid10_copies > rs->md.raid_disks) { |
592 | rs->ti->error = "Not enough devices to satisfy specification"; | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | /* (Len * #mirrors) / #devices */ | ||
597 | sectors_per_dev = rs->ti->len * raid10_copies; | ||
598 | sector_div(sectors_per_dev, rs->md.raid_disks); | ||
599 | |||
600 | rs->md.layout = raid10_format_to_md_layout(raid10_format, | ||
601 | raid10_copies); | ||
602 | rs->md.new_layout = rs->md.layout; | ||
603 | } else if ((rs->raid_type->level > 1) && | ||
604 | sector_div(sectors_per_dev, | ||
605 | (rs->md.raid_disks - rs->raid_type->parity_devs))) { | ||
543 | rs->ti->error = "Target length not divisible by number of data devices"; | 606 | rs->ti->error = "Target length not divisible by number of data devices"; |
544 | return -EINVAL; | 607 | return -EINVAL; |
545 | } | 608 | } |
@@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
566 | if (rs->raid_type->level == 1) | 629 | if (rs->raid_type->level == 1) |
567 | return md_raid1_congested(&rs->md, bits); | 630 | return md_raid1_congested(&rs->md, bits); |
568 | 631 | ||
632 | if (rs->raid_type->level == 10) | ||
633 | return md_raid10_congested(&rs->md, bits); | ||
634 | |||
569 | return md_raid5_congested(&rs->md, bits); | 635 | return md_raid5_congested(&rs->md, bits); |
570 | } | 636 | } |
571 | 637 | ||
@@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
884 | case 6: | 950 | case 6: |
885 | redundancy = rs->raid_type->parity_devs; | 951 | redundancy = rs->raid_type->parity_devs; |
886 | break; | 952 | break; |
953 | case 10: | ||
954 | redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; | ||
955 | break; | ||
887 | default: | 956 | default: |
888 | ti->error = "Unknown RAID type"; | 957 | ti->error = "Unknown RAID type"; |
889 | return -EINVAL; | 958 | return -EINVAL; |
@@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1049 | goto bad; | 1118 | goto bad; |
1050 | } | 1119 | } |
1051 | 1120 | ||
1121 | if (ti->len != rs->md.array_sectors) { | ||
1122 | ti->error = "Array size does not match requested target length"; | ||
1123 | ret = -EINVAL; | ||
1124 | goto size_mismatch; | ||
1125 | } | ||
1052 | rs->callbacks.congested_fn = raid_is_congested; | 1126 | rs->callbacks.congested_fn = raid_is_congested; |
1053 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 1127 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
1054 | 1128 | ||
1055 | mddev_suspend(&rs->md); | 1129 | mddev_suspend(&rs->md); |
1056 | return 0; | 1130 | return 0; |
1057 | 1131 | ||
1132 | size_mismatch: | ||
1133 | md_stop(&rs->md); | ||
1058 | bad: | 1134 | bad: |
1059 | context_free(rs); | 1135 | context_free(rs); |
1060 | 1136 | ||
@@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
1203 | DMEMIT(" region_size %lu", | 1279 | DMEMIT(" region_size %lu", |
1204 | rs->md.bitmap_info.chunksize >> 9); | 1280 | rs->md.bitmap_info.chunksize >> 9); |
1205 | 1281 | ||
1282 | if (rs->print_flags & DMPF_RAID10_COPIES) | ||
1283 | DMEMIT(" raid10_copies %u", | ||
1284 | raid10_md_layout_to_copies(rs->md.layout)); | ||
1285 | |||
1286 | if (rs->print_flags & DMPF_RAID10_FORMAT) | ||
1287 | DMEMIT(" raid10_format near"); | ||
1288 | |||
1206 | DMEMIT(" %d", rs->md.raid_disks); | 1289 | DMEMIT(" %d", rs->md.raid_disks); |
1207 | for (i = 0; i < rs->md.raid_disks; i++) { | 1290 | for (i = 0; i < rs->md.raid_disks; i++) { |
1208 | if (rs->dev[i].meta_dev) | 1291 | if (rs->dev[i].meta_dev) |
@@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti) | |||
1277 | 1360 | ||
1278 | static struct target_type raid_target = { | 1361 | static struct target_type raid_target = { |
1279 | .name = "raid", | 1362 | .name = "raid", |
1280 | .version = {1, 2, 0}, | 1363 | .version = {1, 3, 0}, |
1281 | .module = THIS_MODULE, | 1364 | .module = THIS_MODULE, |
1282 | .ctr = raid_ctr, | 1365 | .ctr = raid_ctr, |
1283 | .dtr = raid_dtr, | 1366 | .dtr = raid_dtr, |
@@ -1304,6 +1387,8 @@ module_init(dm_raid_init); | |||
1304 | module_exit(dm_raid_exit); | 1387 | module_exit(dm_raid_exit); |
1305 | 1388 | ||
1306 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | 1389 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); |
1390 | MODULE_ALIAS("dm-raid1"); | ||
1391 | MODULE_ALIAS("dm-raid10"); | ||
1307 | MODULE_ALIAS("dm-raid4"); | 1392 | MODULE_ALIAS("dm-raid4"); |
1308 | MODULE_ALIAS("dm-raid5"); | 1393 | MODULE_ALIAS("dm-raid5"); |
1309 | MODULE_ALIAS("dm-raid6"); | 1394 | MODULE_ALIAS("dm-raid6"); |