diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-01 12:02:01 -0400 |
commit | fcff06c438b60f415af5983efe92811d6aa02ad1 (patch) | |
tree | 704f6598b2de60a86774bc5cf152d4f051bd2dc4 /drivers/md | |
parent | 068535f1fef4c90aee23eb7b9b9a71c5b72d7cd0 (diff) | |
parent | 63f33b8dda88923487004b20fba825486d009e7b (diff) |
Merge branch 'for-next' of git://neil.brown.name/md
Pull md updates from NeilBrown.
* 'for-next' of git://neil.brown.name/md:
DM RAID: Add support for MD RAID10
md/RAID1: Add missing case for attempting to repair known bad blocks.
md/raid5: For odirect-write performance, do not set STRIPE_PREREAD_ACTIVE.
md/raid1: don't abort a resync on the first badblock.
md: remove duplicated test on ->openers when calling do_md_stop()
raid5: Add R5_ReadNoMerge flag which prevent bio from merging at block layer
md/raid1: prevent merging too large request
md/raid1: read balance chooses idlest disk for SSD
md/raid1: make sequential read detection per disk based
MD RAID10: Export md_raid10_congested
MD: Move macros from raid1*.h to raid1*.c
MD RAID1: rename mirror_info structure
MD RAID10: rename mirror_info structure
MD RAID10: Fix compiler warning.
raid5: add a per-stripe lock
raid5: remove unnecessary bitmap write optimization
raid5: lockless access raid5 overrided bi_phys_segments
raid5: reduce chance release_stripe() taking device_lock
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-raid.c | 95 | ||||
-rw-r--r-- | drivers/md/md.c | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 164 | ||||
-rw-r--r-- | drivers/md/raid1.h | 30 | ||||
-rw-r--r-- | drivers/md/raid10.c | 92 | ||||
-rw-r--r-- | drivers/md/raid10.h | 23 | ||||
-rw-r--r-- | drivers/md/raid5.c | 205 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
8 files changed, 400 insertions, 219 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f2f29c526544..982e3e390c45 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include "md.h" | 11 | #include "md.h" |
12 | #include "raid1.h" | 12 | #include "raid1.h" |
13 | #include "raid5.h" | 13 | #include "raid5.h" |
14 | #include "raid10.h" | ||
14 | #include "bitmap.h" | 15 | #include "bitmap.h" |
15 | 16 | ||
16 | #include <linux/device-mapper.h> | 17 | #include <linux/device-mapper.h> |
@@ -52,7 +53,10 @@ struct raid_dev { | |||
52 | #define DMPF_MAX_RECOVERY_RATE 0x20 | 53 | #define DMPF_MAX_RECOVERY_RATE 0x20 |
53 | #define DMPF_MAX_WRITE_BEHIND 0x40 | 54 | #define DMPF_MAX_WRITE_BEHIND 0x40 |
54 | #define DMPF_STRIPE_CACHE 0x80 | 55 | #define DMPF_STRIPE_CACHE 0x80 |
55 | #define DMPF_REGION_SIZE 0X100 | 56 | #define DMPF_REGION_SIZE 0x100 |
57 | #define DMPF_RAID10_COPIES 0x200 | ||
58 | #define DMPF_RAID10_FORMAT 0x400 | ||
59 | |||
56 | struct raid_set { | 60 | struct raid_set { |
57 | struct dm_target *ti; | 61 | struct dm_target *ti; |
58 | 62 | ||
@@ -76,6 +80,7 @@ static struct raid_type { | |||
76 | const unsigned algorithm; /* RAID algorithm. */ | 80 | const unsigned algorithm; /* RAID algorithm. */ |
77 | } raid_types[] = { | 81 | } raid_types[] = { |
78 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, | 82 | {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, |
83 | {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, | ||
79 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, | 84 | {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, |
80 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, | 85 | {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, |
81 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, | 86 | {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, |
@@ -86,6 +91,17 @@ static struct raid_type { | |||
86 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} | 91 | {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} |
87 | }; | 92 | }; |
88 | 93 | ||
94 | static unsigned raid10_md_layout_to_copies(int layout) | ||
95 | { | ||
96 | return layout & 0xFF; | ||
97 | } | ||
98 | |||
99 | static int raid10_format_to_md_layout(char *format, unsigned copies) | ||
100 | { | ||
101 | /* 1 "far" copy, and 'copies' "near" copies */ | ||
102 | return (1 << 8) | (copies & 0xFF); | ||
103 | } | ||
104 | |||
89 | static struct raid_type *get_raid_type(char *name) | 105 | static struct raid_type *get_raid_type(char *name) |
90 | { | 106 | { |
91 | int i; | 107 | int i; |
@@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) | |||
339 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) | 355 | * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) |
340 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs | 356 | * [stripe_cache <sectors>] Stripe cache size for higher RAIDs |
341 | * [region_size <sectors>] Defines granularity of bitmap | 357 | * [region_size <sectors>] Defines granularity of bitmap |
358 | * | ||
359 | * RAID10-only options: | ||
360 | * [raid10_copies <# copies>] Number of copies. (Default: 2) | ||
361 | * [raid10_format <near>] Layout algorithm. (Default: near) | ||
342 | */ | 362 | */ |
343 | static int parse_raid_params(struct raid_set *rs, char **argv, | 363 | static int parse_raid_params(struct raid_set *rs, char **argv, |
344 | unsigned num_raid_params) | 364 | unsigned num_raid_params) |
345 | { | 365 | { |
366 | char *raid10_format = "near"; | ||
367 | unsigned raid10_copies = 2; | ||
346 | unsigned i, rebuild_cnt = 0; | 368 | unsigned i, rebuild_cnt = 0; |
347 | unsigned long value, region_size = 0; | 369 | unsigned long value, region_size = 0; |
348 | sector_t sectors_per_dev = rs->ti->len; | 370 | sector_t sectors_per_dev = rs->ti->len; |
@@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
416 | } | 438 | } |
417 | 439 | ||
418 | key = argv[i++]; | 440 | key = argv[i++]; |
441 | |||
442 | /* Parameters that take a string value are checked here. */ | ||
443 | if (!strcasecmp(key, "raid10_format")) { | ||
444 | if (rs->raid_type->level != 10) { | ||
445 | rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; | ||
446 | return -EINVAL; | ||
447 | } | ||
448 | if (strcmp("near", argv[i])) { | ||
449 | rs->ti->error = "Invalid 'raid10_format' value given"; | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | raid10_format = argv[i]; | ||
453 | rs->print_flags |= DMPF_RAID10_FORMAT; | ||
454 | continue; | ||
455 | } | ||
456 | |||
419 | if (strict_strtoul(argv[i], 10, &value) < 0) { | 457 | if (strict_strtoul(argv[i], 10, &value) < 0) { |
420 | rs->ti->error = "Bad numerical argument given in raid params"; | 458 | rs->ti->error = "Bad numerical argument given in raid params"; |
421 | return -EINVAL; | 459 | return -EINVAL; |
422 | } | 460 | } |
423 | 461 | ||
462 | /* Parameters that take a numeric value are checked here */ | ||
424 | if (!strcasecmp(key, "rebuild")) { | 463 | if (!strcasecmp(key, "rebuild")) { |
425 | rebuild_cnt++; | 464 | rebuild_cnt++; |
426 | 465 | ||
@@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
439 | return -EINVAL; | 478 | return -EINVAL; |
440 | } | 479 | } |
441 | break; | 480 | break; |
481 | case 10: | ||
442 | default: | 482 | default: |
443 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); | 483 | DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); |
444 | rs->ti->error = "Rebuild not supported for this RAID type"; | 484 | rs->ti->error = "Rebuild not supported for this RAID type"; |
@@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
495 | */ | 535 | */ |
496 | value /= 2; | 536 | value /= 2; |
497 | 537 | ||
498 | if (rs->raid_type->level < 5) { | 538 | if ((rs->raid_type->level != 5) && |
539 | (rs->raid_type->level != 6)) { | ||
499 | rs->ti->error = "Inappropriate argument: stripe_cache"; | 540 | rs->ti->error = "Inappropriate argument: stripe_cache"; |
500 | return -EINVAL; | 541 | return -EINVAL; |
501 | } | 542 | } |
@@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
520 | } else if (!strcasecmp(key, "region_size")) { | 561 | } else if (!strcasecmp(key, "region_size")) { |
521 | rs->print_flags |= DMPF_REGION_SIZE; | 562 | rs->print_flags |= DMPF_REGION_SIZE; |
522 | region_size = value; | 563 | region_size = value; |
564 | } else if (!strcasecmp(key, "raid10_copies") && | ||
565 | (rs->raid_type->level == 10)) { | ||
566 | if ((value < 2) || (value > 0xFF)) { | ||
567 | rs->ti->error = "Bad value for 'raid10_copies'"; | ||
568 | return -EINVAL; | ||
569 | } | ||
570 | rs->print_flags |= DMPF_RAID10_COPIES; | ||
571 | raid10_copies = value; | ||
523 | } else { | 572 | } else { |
524 | DMERR("Unable to parse RAID parameter: %s", key); | 573 | DMERR("Unable to parse RAID parameter: %s", key); |
525 | rs->ti->error = "Unable to parse RAID parameters"; | 574 | rs->ti->error = "Unable to parse RAID parameters"; |
@@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, | |||
538 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) | 587 | if (dm_set_target_max_io_len(rs->ti, max_io_len)) |
539 | return -EINVAL; | 588 | return -EINVAL; |
540 | 589 | ||
541 | if ((rs->raid_type->level > 1) && | 590 | if (rs->raid_type->level == 10) { |
542 | sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { | 591 | if (raid10_copies > rs->md.raid_disks) { |
592 | rs->ti->error = "Not enough devices to satisfy specification"; | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | /* (Len * #mirrors) / #devices */ | ||
597 | sectors_per_dev = rs->ti->len * raid10_copies; | ||
598 | sector_div(sectors_per_dev, rs->md.raid_disks); | ||
599 | |||
600 | rs->md.layout = raid10_format_to_md_layout(raid10_format, | ||
601 | raid10_copies); | ||
602 | rs->md.new_layout = rs->md.layout; | ||
603 | } else if ((rs->raid_type->level > 1) && | ||
604 | sector_div(sectors_per_dev, | ||
605 | (rs->md.raid_disks - rs->raid_type->parity_devs))) { | ||
543 | rs->ti->error = "Target length not divisible by number of data devices"; | 606 | rs->ti->error = "Target length not divisible by number of data devices"; |
544 | return -EINVAL; | 607 | return -EINVAL; |
545 | } | 608 | } |
@@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
566 | if (rs->raid_type->level == 1) | 629 | if (rs->raid_type->level == 1) |
567 | return md_raid1_congested(&rs->md, bits); | 630 | return md_raid1_congested(&rs->md, bits); |
568 | 631 | ||
632 | if (rs->raid_type->level == 10) | ||
633 | return md_raid10_congested(&rs->md, bits); | ||
634 | |||
569 | return md_raid5_congested(&rs->md, bits); | 635 | return md_raid5_congested(&rs->md, bits); |
570 | } | 636 | } |
571 | 637 | ||
@@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) | |||
884 | case 6: | 950 | case 6: |
885 | redundancy = rs->raid_type->parity_devs; | 951 | redundancy = rs->raid_type->parity_devs; |
886 | break; | 952 | break; |
953 | case 10: | ||
954 | redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; | ||
955 | break; | ||
887 | default: | 956 | default: |
888 | ti->error = "Unknown RAID type"; | 957 | ti->error = "Unknown RAID type"; |
889 | return -EINVAL; | 958 | return -EINVAL; |
@@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
1049 | goto bad; | 1118 | goto bad; |
1050 | } | 1119 | } |
1051 | 1120 | ||
1121 | if (ti->len != rs->md.array_sectors) { | ||
1122 | ti->error = "Array size does not match requested target length"; | ||
1123 | ret = -EINVAL; | ||
1124 | goto size_mismatch; | ||
1125 | } | ||
1052 | rs->callbacks.congested_fn = raid_is_congested; | 1126 | rs->callbacks.congested_fn = raid_is_congested; |
1053 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 1127 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
1054 | 1128 | ||
1055 | mddev_suspend(&rs->md); | 1129 | mddev_suspend(&rs->md); |
1056 | return 0; | 1130 | return 0; |
1057 | 1131 | ||
1132 | size_mismatch: | ||
1133 | md_stop(&rs->md); | ||
1058 | bad: | 1134 | bad: |
1059 | context_free(rs); | 1135 | context_free(rs); |
1060 | 1136 | ||
@@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type, | |||
1203 | DMEMIT(" region_size %lu", | 1279 | DMEMIT(" region_size %lu", |
1204 | rs->md.bitmap_info.chunksize >> 9); | 1280 | rs->md.bitmap_info.chunksize >> 9); |
1205 | 1281 | ||
1282 | if (rs->print_flags & DMPF_RAID10_COPIES) | ||
1283 | DMEMIT(" raid10_copies %u", | ||
1284 | raid10_md_layout_to_copies(rs->md.layout)); | ||
1285 | |||
1286 | if (rs->print_flags & DMPF_RAID10_FORMAT) | ||
1287 | DMEMIT(" raid10_format near"); | ||
1288 | |||
1206 | DMEMIT(" %d", rs->md.raid_disks); | 1289 | DMEMIT(" %d", rs->md.raid_disks); |
1207 | for (i = 0; i < rs->md.raid_disks; i++) { | 1290 | for (i = 0; i < rs->md.raid_disks; i++) { |
1208 | if (rs->dev[i].meta_dev) | 1291 | if (rs->dev[i].meta_dev) |
@@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti) | |||
1277 | 1360 | ||
1278 | static struct target_type raid_target = { | 1361 | static struct target_type raid_target = { |
1279 | .name = "raid", | 1362 | .name = "raid", |
1280 | .version = {1, 2, 0}, | 1363 | .version = {1, 3, 0}, |
1281 | .module = THIS_MODULE, | 1364 | .module = THIS_MODULE, |
1282 | .ctr = raid_ctr, | 1365 | .ctr = raid_ctr, |
1283 | .dtr = raid_dtr, | 1366 | .dtr = raid_dtr, |
@@ -1304,6 +1387,8 @@ module_init(dm_raid_init); | |||
1304 | module_exit(dm_raid_exit); | 1387 | module_exit(dm_raid_exit); |
1305 | 1388 | ||
1306 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); | 1389 | MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); |
1390 | MODULE_ALIAS("dm-raid1"); | ||
1391 | MODULE_ALIAS("dm-raid10"); | ||
1307 | MODULE_ALIAS("dm-raid4"); | 1392 | MODULE_ALIAS("dm-raid4"); |
1308 | MODULE_ALIAS("dm-raid5"); | 1393 | MODULE_ALIAS("dm-raid5"); |
1309 | MODULE_ALIAS("dm-raid6"); | 1394 | MODULE_ALIAS("dm-raid6"); |
diff --git a/drivers/md/md.c b/drivers/md/md.c index d5ab4493c8be..f6c46109b071 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3942,17 +3942,13 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) | |||
3942 | break; | 3942 | break; |
3943 | case clear: | 3943 | case clear: |
3944 | /* stopping an active array */ | 3944 | /* stopping an active array */ |
3945 | if (atomic_read(&mddev->openers) > 0) | ||
3946 | return -EBUSY; | ||
3947 | err = do_md_stop(mddev, 0, NULL); | 3945 | err = do_md_stop(mddev, 0, NULL); |
3948 | break; | 3946 | break; |
3949 | case inactive: | 3947 | case inactive: |
3950 | /* stopping an active array */ | 3948 | /* stopping an active array */ |
3951 | if (mddev->pers) { | 3949 | if (mddev->pers) |
3952 | if (atomic_read(&mddev->openers) > 0) | ||
3953 | return -EBUSY; | ||
3954 | err = do_md_stop(mddev, 2, NULL); | 3950 | err = do_md_stop(mddev, 2, NULL); |
3955 | } else | 3951 | else |
3956 | err = 0; /* already inactive */ | 3952 | err = 0; /* already inactive */ |
3957 | break; | 3953 | break; |
3958 | case suspended: | 3954 | case suspended: |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cacd008d6864..197f62681db5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -46,6 +46,20 @@ | |||
46 | */ | 46 | */ |
47 | #define NR_RAID1_BIOS 256 | 47 | #define NR_RAID1_BIOS 256 |
48 | 48 | ||
49 | /* when we get a read error on a read-only array, we redirect to another | ||
50 | * device without failing the first device, or trying to over-write to | ||
51 | * correct the read error. To keep track of bad blocks on a per-bio | ||
52 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
53 | */ | ||
54 | #define IO_BLOCKED ((struct bio *)1) | ||
55 | /* When we successfully write to a known bad-block, we need to remove the | ||
56 | * bad-block marking which must be done from process context. So we record | ||
57 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
58 | */ | ||
59 | #define IO_MADE_GOOD ((struct bio *)2) | ||
60 | |||
61 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
62 | |||
49 | /* When there are this many requests queue to be written by | 63 | /* When there are this many requests queue to be written by |
50 | * the raid1 thread, we become 'congested' to provide back-pressure | 64 | * the raid1 thread, we become 'congested' to provide back-pressure |
51 | * for writeback. | 65 | * for writeback. |
@@ -483,12 +497,14 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
483 | const sector_t this_sector = r1_bio->sector; | 497 | const sector_t this_sector = r1_bio->sector; |
484 | int sectors; | 498 | int sectors; |
485 | int best_good_sectors; | 499 | int best_good_sectors; |
486 | int start_disk; | 500 | int best_disk, best_dist_disk, best_pending_disk; |
487 | int best_disk; | 501 | int has_nonrot_disk; |
488 | int i; | 502 | int disk; |
489 | sector_t best_dist; | 503 | sector_t best_dist; |
504 | unsigned int min_pending; | ||
490 | struct md_rdev *rdev; | 505 | struct md_rdev *rdev; |
491 | int choose_first; | 506 | int choose_first; |
507 | int choose_next_idle; | ||
492 | 508 | ||
493 | rcu_read_lock(); | 509 | rcu_read_lock(); |
494 | /* | 510 | /* |
@@ -499,26 +515,26 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
499 | retry: | 515 | retry: |
500 | sectors = r1_bio->sectors; | 516 | sectors = r1_bio->sectors; |
501 | best_disk = -1; | 517 | best_disk = -1; |
518 | best_dist_disk = -1; | ||
502 | best_dist = MaxSector; | 519 | best_dist = MaxSector; |
520 | best_pending_disk = -1; | ||
521 | min_pending = UINT_MAX; | ||
503 | best_good_sectors = 0; | 522 | best_good_sectors = 0; |
523 | has_nonrot_disk = 0; | ||
524 | choose_next_idle = 0; | ||
504 | 525 | ||
505 | if (conf->mddev->recovery_cp < MaxSector && | 526 | if (conf->mddev->recovery_cp < MaxSector && |
506 | (this_sector + sectors >= conf->next_resync)) { | 527 | (this_sector + sectors >= conf->next_resync)) |
507 | choose_first = 1; | 528 | choose_first = 1; |
508 | start_disk = 0; | 529 | else |
509 | } else { | ||
510 | choose_first = 0; | 530 | choose_first = 0; |
511 | start_disk = conf->last_used; | ||
512 | } | ||
513 | 531 | ||
514 | for (i = 0 ; i < conf->raid_disks * 2 ; i++) { | 532 | for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) { |
515 | sector_t dist; | 533 | sector_t dist; |
516 | sector_t first_bad; | 534 | sector_t first_bad; |
517 | int bad_sectors; | 535 | int bad_sectors; |
518 | 536 | unsigned int pending; | |
519 | int disk = start_disk + i; | 537 | bool nonrot; |
520 | if (disk >= conf->raid_disks * 2) | ||
521 | disk -= conf->raid_disks * 2; | ||
522 | 538 | ||
523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 539 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
524 | if (r1_bio->bios[disk] == IO_BLOCKED | 540 | if (r1_bio->bios[disk] == IO_BLOCKED |
@@ -577,22 +593,77 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
577 | } else | 593 | } else |
578 | best_good_sectors = sectors; | 594 | best_good_sectors = sectors; |
579 | 595 | ||
596 | nonrot = blk_queue_nonrot(bdev_get_queue(rdev->bdev)); | ||
597 | has_nonrot_disk |= nonrot; | ||
598 | pending = atomic_read(&rdev->nr_pending); | ||
580 | dist = abs(this_sector - conf->mirrors[disk].head_position); | 599 | dist = abs(this_sector - conf->mirrors[disk].head_position); |
581 | if (choose_first | 600 | if (choose_first) { |
582 | /* Don't change to another disk for sequential reads */ | 601 | best_disk = disk; |
583 | || conf->next_seq_sect == this_sector | 602 | break; |
584 | || dist == 0 | 603 | } |
585 | /* If device is idle, use it */ | 604 | /* Don't change to another disk for sequential reads */ |
586 | || atomic_read(&rdev->nr_pending) == 0) { | 605 | if (conf->mirrors[disk].next_seq_sect == this_sector |
606 | || dist == 0) { | ||
607 | int opt_iosize = bdev_io_opt(rdev->bdev) >> 9; | ||
608 | struct raid1_info *mirror = &conf->mirrors[disk]; | ||
609 | |||
610 | best_disk = disk; | ||
611 | /* | ||
612 | * If buffered sequential IO size exceeds optimal | ||
613 | * iosize, check if there is idle disk. If yes, choose | ||
614 | * the idle disk. read_balance could already choose an | ||
615 | * idle disk before noticing it's a sequential IO in | ||
616 | * this disk. This doesn't matter because this disk | ||
617 | * will idle, next time it will be utilized after the | ||
618 | * first disk has IO size exceeds optimal iosize. In | ||
619 | * this way, iosize of the first disk will be optimal | ||
620 | * iosize at least. iosize of the second disk might be | ||
621 | * small, but not a big deal since when the second disk | ||
622 | * starts IO, the first disk is likely still busy. | ||
623 | */ | ||
624 | if (nonrot && opt_iosize > 0 && | ||
625 | mirror->seq_start != MaxSector && | ||
626 | mirror->next_seq_sect > opt_iosize && | ||
627 | mirror->next_seq_sect - opt_iosize >= | ||
628 | mirror->seq_start) { | ||
629 | choose_next_idle = 1; | ||
630 | continue; | ||
631 | } | ||
632 | break; | ||
633 | } | ||
634 | /* If device is idle, use it */ | ||
635 | if (pending == 0) { | ||
587 | best_disk = disk; | 636 | best_disk = disk; |
588 | break; | 637 | break; |
589 | } | 638 | } |
639 | |||
640 | if (choose_next_idle) | ||
641 | continue; | ||
642 | |||
643 | if (min_pending > pending) { | ||
644 | min_pending = pending; | ||
645 | best_pending_disk = disk; | ||
646 | } | ||
647 | |||
590 | if (dist < best_dist) { | 648 | if (dist < best_dist) { |
591 | best_dist = dist; | 649 | best_dist = dist; |
592 | best_disk = disk; | 650 | best_dist_disk = disk; |
593 | } | 651 | } |
594 | } | 652 | } |
595 | 653 | ||
654 | /* | ||
655 | * If all disks are rotational, choose the closest disk. If any disk is | ||
656 | * non-rotational, choose the disk with less pending request even the | ||
657 | * disk is rotational, which might/might not be optimal for raids with | ||
658 | * mixed ratation/non-rotational disks depending on workload. | ||
659 | */ | ||
660 | if (best_disk == -1) { | ||
661 | if (has_nonrot_disk) | ||
662 | best_disk = best_pending_disk; | ||
663 | else | ||
664 | best_disk = best_dist_disk; | ||
665 | } | ||
666 | |||
596 | if (best_disk >= 0) { | 667 | if (best_disk >= 0) { |
597 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); | 668 | rdev = rcu_dereference(conf->mirrors[best_disk].rdev); |
598 | if (!rdev) | 669 | if (!rdev) |
@@ -606,8 +677,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
606 | goto retry; | 677 | goto retry; |
607 | } | 678 | } |
608 | sectors = best_good_sectors; | 679 | sectors = best_good_sectors; |
609 | conf->next_seq_sect = this_sector + sectors; | 680 | |
610 | conf->last_used = best_disk; | 681 | if (conf->mirrors[best_disk].next_seq_sect != this_sector) |
682 | conf->mirrors[best_disk].seq_start = this_sector; | ||
683 | |||
684 | conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; | ||
611 | } | 685 | } |
612 | rcu_read_unlock(); | 686 | rcu_read_unlock(); |
613 | *max_sectors = sectors; | 687 | *max_sectors = sectors; |
@@ -873,7 +947,7 @@ do_sync_io: | |||
873 | static void make_request(struct mddev *mddev, struct bio * bio) | 947 | static void make_request(struct mddev *mddev, struct bio * bio) |
874 | { | 948 | { |
875 | struct r1conf *conf = mddev->private; | 949 | struct r1conf *conf = mddev->private; |
876 | struct mirror_info *mirror; | 950 | struct raid1_info *mirror; |
877 | struct r1bio *r1_bio; | 951 | struct r1bio *r1_bio; |
878 | struct bio *read_bio; | 952 | struct bio *read_bio; |
879 | int i, disks; | 953 | int i, disks; |
@@ -1364,7 +1438,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1364 | struct r1conf *conf = mddev->private; | 1438 | struct r1conf *conf = mddev->private; |
1365 | int err = -EEXIST; | 1439 | int err = -EEXIST; |
1366 | int mirror = 0; | 1440 | int mirror = 0; |
1367 | struct mirror_info *p; | 1441 | struct raid1_info *p; |
1368 | int first = 0; | 1442 | int first = 0; |
1369 | int last = conf->raid_disks - 1; | 1443 | int last = conf->raid_disks - 1; |
1370 | struct request_queue *q = bdev_get_queue(rdev->bdev); | 1444 | struct request_queue *q = bdev_get_queue(rdev->bdev); |
@@ -1433,7 +1507,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1433 | struct r1conf *conf = mddev->private; | 1507 | struct r1conf *conf = mddev->private; |
1434 | int err = 0; | 1508 | int err = 0; |
1435 | int number = rdev->raid_disk; | 1509 | int number = rdev->raid_disk; |
1436 | struct mirror_info *p = conf->mirrors+ number; | 1510 | struct raid1_info *p = conf->mirrors + number; |
1437 | 1511 | ||
1438 | if (rdev != p->rdev) | 1512 | if (rdev != p->rdev) |
1439 | p = conf->mirrors + conf->raid_disks + number; | 1513 | p = conf->mirrors + conf->raid_disks + number; |
@@ -2371,6 +2445,18 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2371 | bio->bi_rw = READ; | 2445 | bio->bi_rw = READ; |
2372 | bio->bi_end_io = end_sync_read; | 2446 | bio->bi_end_io = end_sync_read; |
2373 | read_targets++; | 2447 | read_targets++; |
2448 | } else if (!test_bit(WriteErrorSeen, &rdev->flags) && | ||
2449 | test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && | ||
2450 | !test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { | ||
2451 | /* | ||
2452 | * The device is suitable for reading (InSync), | ||
2453 | * but has bad block(s) here. Let's try to correct them, | ||
2454 | * if we are doing resync or repair. Otherwise, leave | ||
2455 | * this device alone for this sync request. | ||
2456 | */ | ||
2457 | bio->bi_rw = WRITE; | ||
2458 | bio->bi_end_io = end_sync_write; | ||
2459 | write_targets++; | ||
2374 | } | 2460 | } |
2375 | } | 2461 | } |
2376 | if (bio->bi_end_io) { | 2462 | if (bio->bi_end_io) { |
@@ -2428,7 +2514,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp | |||
2428 | /* There is nowhere to write, so all non-sync | 2514 | /* There is nowhere to write, so all non-sync |
2429 | * drives must be failed - so we are finished | 2515 | * drives must be failed - so we are finished |
2430 | */ | 2516 | */ |
2431 | sector_t rv = max_sector - sector_nr; | 2517 | sector_t rv; |
2518 | if (min_bad > 0) | ||
2519 | max_sector = sector_nr + min_bad; | ||
2520 | rv = max_sector - sector_nr; | ||
2432 | *skipped = 1; | 2521 | *skipped = 1; |
2433 | put_buf(r1_bio); | 2522 | put_buf(r1_bio); |
2434 | return rv; | 2523 | return rv; |
@@ -2521,7 +2610,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2521 | { | 2610 | { |
2522 | struct r1conf *conf; | 2611 | struct r1conf *conf; |
2523 | int i; | 2612 | int i; |
2524 | struct mirror_info *disk; | 2613 | struct raid1_info *disk; |
2525 | struct md_rdev *rdev; | 2614 | struct md_rdev *rdev; |
2526 | int err = -ENOMEM; | 2615 | int err = -ENOMEM; |
2527 | 2616 | ||
@@ -2529,7 +2618,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2529 | if (!conf) | 2618 | if (!conf) |
2530 | goto abort; | 2619 | goto abort; |
2531 | 2620 | ||
2532 | conf->mirrors = kzalloc(sizeof(struct mirror_info) | 2621 | conf->mirrors = kzalloc(sizeof(struct raid1_info) |
2533 | * mddev->raid_disks * 2, | 2622 | * mddev->raid_disks * 2, |
2534 | GFP_KERNEL); | 2623 | GFP_KERNEL); |
2535 | if (!conf->mirrors) | 2624 | if (!conf->mirrors) |
@@ -2572,6 +2661,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2572 | mddev->merge_check_needed = 1; | 2661 | mddev->merge_check_needed = 1; |
2573 | 2662 | ||
2574 | disk->head_position = 0; | 2663 | disk->head_position = 0; |
2664 | disk->seq_start = MaxSector; | ||
2575 | } | 2665 | } |
2576 | conf->raid_disks = mddev->raid_disks; | 2666 | conf->raid_disks = mddev->raid_disks; |
2577 | conf->mddev = mddev; | 2667 | conf->mddev = mddev; |
@@ -2585,7 +2675,6 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2585 | conf->recovery_disabled = mddev->recovery_disabled - 1; | 2675 | conf->recovery_disabled = mddev->recovery_disabled - 1; |
2586 | 2676 | ||
2587 | err = -EIO; | 2677 | err = -EIO; |
2588 | conf->last_used = -1; | ||
2589 | for (i = 0; i < conf->raid_disks * 2; i++) { | 2678 | for (i = 0; i < conf->raid_disks * 2; i++) { |
2590 | 2679 | ||
2591 | disk = conf->mirrors + i; | 2680 | disk = conf->mirrors + i; |
@@ -2611,19 +2700,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) | |||
2611 | if (disk->rdev && | 2700 | if (disk->rdev && |
2612 | (disk->rdev->saved_raid_disk < 0)) | 2701 | (disk->rdev->saved_raid_disk < 0)) |
2613 | conf->fullsync = 1; | 2702 | conf->fullsync = 1; |
2614 | } else if (conf->last_used < 0) | 2703 | } |
2615 | /* | ||
2616 | * The first working device is used as a | ||
2617 | * starting point to read balancing. | ||
2618 | */ | ||
2619 | conf->last_used = i; | ||
2620 | } | 2704 | } |
2621 | 2705 | ||
2622 | if (conf->last_used < 0) { | ||
2623 | printk(KERN_ERR "md/raid1:%s: no operational mirrors\n", | ||
2624 | mdname(mddev)); | ||
2625 | goto abort; | ||
2626 | } | ||
2627 | err = -ENOMEM; | 2706 | err = -ENOMEM; |
2628 | conf->thread = md_register_thread(raid1d, mddev, "raid1"); | 2707 | conf->thread = md_register_thread(raid1d, mddev, "raid1"); |
2629 | if (!conf->thread) { | 2708 | if (!conf->thread) { |
@@ -2798,7 +2877,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2798 | */ | 2877 | */ |
2799 | mempool_t *newpool, *oldpool; | 2878 | mempool_t *newpool, *oldpool; |
2800 | struct pool_info *newpoolinfo; | 2879 | struct pool_info *newpoolinfo; |
2801 | struct mirror_info *newmirrors; | 2880 | struct raid1_info *newmirrors; |
2802 | struct r1conf *conf = mddev->private; | 2881 | struct r1conf *conf = mddev->private; |
2803 | int cnt, raid_disks; | 2882 | int cnt, raid_disks; |
2804 | unsigned long flags; | 2883 | unsigned long flags; |
@@ -2841,7 +2920,7 @@ static int raid1_reshape(struct mddev *mddev) | |||
2841 | kfree(newpoolinfo); | 2920 | kfree(newpoolinfo); |
2842 | return -ENOMEM; | 2921 | return -ENOMEM; |
2843 | } | 2922 | } |
2844 | newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2, | 2923 | newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2, |
2845 | GFP_KERNEL); | 2924 | GFP_KERNEL); |
2846 | if (!newmirrors) { | 2925 | if (!newmirrors) { |
2847 | kfree(newpoolinfo); | 2926 | kfree(newpoolinfo); |
@@ -2880,7 +2959,6 @@ static int raid1_reshape(struct mddev *mddev) | |||
2880 | conf->raid_disks = mddev->raid_disks = raid_disks; | 2959 | conf->raid_disks = mddev->raid_disks = raid_disks; |
2881 | mddev->delta_disks = 0; | 2960 | mddev->delta_disks = 0; |
2882 | 2961 | ||
2883 | conf->last_used = 0; /* just make sure it is in-range */ | ||
2884 | lower_barrier(conf); | 2962 | lower_barrier(conf); |
2885 | 2963 | ||
2886 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 2964 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 80ded139314c..0ff3715fb7eb 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -1,9 +1,15 @@ | |||
1 | #ifndef _RAID1_H | 1 | #ifndef _RAID1_H |
2 | #define _RAID1_H | 2 | #define _RAID1_H |
3 | 3 | ||
4 | struct mirror_info { | 4 | struct raid1_info { |
5 | struct md_rdev *rdev; | 5 | struct md_rdev *rdev; |
6 | sector_t head_position; | 6 | sector_t head_position; |
7 | |||
8 | /* When choose the best device for a read (read_balance()) | ||
9 | * we try to keep sequential reads one the same device | ||
10 | */ | ||
11 | sector_t next_seq_sect; | ||
12 | sector_t seq_start; | ||
7 | }; | 13 | }; |
8 | 14 | ||
9 | /* | 15 | /* |
@@ -24,17 +30,11 @@ struct pool_info { | |||
24 | 30 | ||
25 | struct r1conf { | 31 | struct r1conf { |
26 | struct mddev *mddev; | 32 | struct mddev *mddev; |
27 | struct mirror_info *mirrors; /* twice 'raid_disks' to | 33 | struct raid1_info *mirrors; /* twice 'raid_disks' to |
28 | * allow for replacements. | 34 | * allow for replacements. |
29 | */ | 35 | */ |
30 | int raid_disks; | 36 | int raid_disks; |
31 | 37 | ||
32 | /* When choose the best device for a read (read_balance()) | ||
33 | * we try to keep sequential reads one the same device | ||
34 | * using 'last_used' and 'next_seq_sect' | ||
35 | */ | ||
36 | int last_used; | ||
37 | sector_t next_seq_sect; | ||
38 | /* During resync, read_balancing is only allowed on the part | 38 | /* During resync, read_balancing is only allowed on the part |
39 | * of the array that has been resynced. 'next_resync' tells us | 39 | * of the array that has been resynced. 'next_resync' tells us |
40 | * where that is. | 40 | * where that is. |
@@ -135,20 +135,6 @@ struct r1bio { | |||
135 | /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ | 135 | /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ |
136 | }; | 136 | }; |
137 | 137 | ||
138 | /* when we get a read error on a read-only array, we redirect to another | ||
139 | * device without failing the first device, or trying to over-write to | ||
140 | * correct the read error. To keep track of bad blocks on a per-bio | ||
141 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
142 | */ | ||
143 | #define IO_BLOCKED ((struct bio *)1) | ||
144 | /* When we successfully write to a known bad-block, we need to remove the | ||
145 | * bad-block marking which must be done from process context. So we record | ||
146 | * the success by setting bios[n] to IO_MADE_GOOD | ||
147 | */ | ||
148 | #define IO_MADE_GOOD ((struct bio *)2) | ||
149 | |||
150 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
151 | |||
152 | /* bits for r1bio.state */ | 138 | /* bits for r1bio.state */ |
153 | #define R1BIO_Uptodate 0 | 139 | #define R1BIO_Uptodate 0 |
154 | #define R1BIO_IsSync 1 | 140 | #define R1BIO_IsSync 1 |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8da6282254c3..e2549deab7c3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -60,7 +60,21 @@ | |||
60 | */ | 60 | */ |
61 | #define NR_RAID10_BIOS 256 | 61 | #define NR_RAID10_BIOS 256 |
62 | 62 | ||
63 | /* When there are this many requests queue to be written by | 63 | /* when we get a read error on a read-only array, we redirect to another |
64 | * device without failing the first device, or trying to over-write to | ||
65 | * correct the read error. To keep track of bad blocks on a per-bio | ||
66 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
67 | */ | ||
68 | #define IO_BLOCKED ((struct bio *)1) | ||
69 | /* When we successfully write to a known bad-block, we need to remove the | ||
70 | * bad-block marking which must be done from process context. So we record | ||
71 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
72 | */ | ||
73 | #define IO_MADE_GOOD ((struct bio *)2) | ||
74 | |||
75 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
76 | |||
77 | /* When there are this many requests queued to be written by | ||
64 | * the raid10 thread, we become 'congested' to provide back-pressure | 78 | * the raid10 thread, we become 'congested' to provide back-pressure |
65 | * for writeback. | 79 | * for writeback. |
66 | */ | 80 | */ |
@@ -717,7 +731,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, | |||
717 | int sectors = r10_bio->sectors; | 731 | int sectors = r10_bio->sectors; |
718 | int best_good_sectors; | 732 | int best_good_sectors; |
719 | sector_t new_distance, best_dist; | 733 | sector_t new_distance, best_dist; |
720 | struct md_rdev *rdev, *best_rdev; | 734 | struct md_rdev *best_rdev, *rdev = NULL; |
721 | int do_balance; | 735 | int do_balance; |
722 | int best_slot; | 736 | int best_slot; |
723 | struct geom *geo = &conf->geo; | 737 | struct geom *geo = &conf->geo; |
@@ -839,9 +853,8 @@ retry: | |||
839 | return rdev; | 853 | return rdev; |
840 | } | 854 | } |
841 | 855 | ||
842 | static int raid10_congested(void *data, int bits) | 856 | int md_raid10_congested(struct mddev *mddev, int bits) |
843 | { | 857 | { |
844 | struct mddev *mddev = data; | ||
845 | struct r10conf *conf = mddev->private; | 858 | struct r10conf *conf = mddev->private; |
846 | int i, ret = 0; | 859 | int i, ret = 0; |
847 | 860 | ||
@@ -849,8 +862,6 @@ static int raid10_congested(void *data, int bits) | |||
849 | conf->pending_count >= max_queued_requests) | 862 | conf->pending_count >= max_queued_requests) |
850 | return 1; | 863 | return 1; |
851 | 864 | ||
852 | if (mddev_congested(mddev, bits)) | ||
853 | return 1; | ||
854 | rcu_read_lock(); | 865 | rcu_read_lock(); |
855 | for (i = 0; | 866 | for (i = 0; |
856 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) | 867 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) |
@@ -866,6 +877,15 @@ static int raid10_congested(void *data, int bits) | |||
866 | rcu_read_unlock(); | 877 | rcu_read_unlock(); |
867 | return ret; | 878 | return ret; |
868 | } | 879 | } |
880 | EXPORT_SYMBOL_GPL(md_raid10_congested); | ||
881 | |||
882 | static int raid10_congested(void *data, int bits) | ||
883 | { | ||
884 | struct mddev *mddev = data; | ||
885 | |||
886 | return mddev_congested(mddev, bits) || | ||
887 | md_raid10_congested(mddev, bits); | ||
888 | } | ||
869 | 889 | ||
870 | static void flush_pending_writes(struct r10conf *conf) | 890 | static void flush_pending_writes(struct r10conf *conf) |
871 | { | 891 | { |
@@ -1546,7 +1566,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) | |||
1546 | static void print_conf(struct r10conf *conf) | 1566 | static void print_conf(struct r10conf *conf) |
1547 | { | 1567 | { |
1548 | int i; | 1568 | int i; |
1549 | struct mirror_info *tmp; | 1569 | struct raid10_info *tmp; |
1550 | 1570 | ||
1551 | printk(KERN_DEBUG "RAID10 conf printout:\n"); | 1571 | printk(KERN_DEBUG "RAID10 conf printout:\n"); |
1552 | if (!conf) { | 1572 | if (!conf) { |
@@ -1580,7 +1600,7 @@ static int raid10_spare_active(struct mddev *mddev) | |||
1580 | { | 1600 | { |
1581 | int i; | 1601 | int i; |
1582 | struct r10conf *conf = mddev->private; | 1602 | struct r10conf *conf = mddev->private; |
1583 | struct mirror_info *tmp; | 1603 | struct raid10_info *tmp; |
1584 | int count = 0; | 1604 | int count = 0; |
1585 | unsigned long flags; | 1605 | unsigned long flags; |
1586 | 1606 | ||
@@ -1655,7 +1675,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1655 | else | 1675 | else |
1656 | mirror = first; | 1676 | mirror = first; |
1657 | for ( ; mirror <= last ; mirror++) { | 1677 | for ( ; mirror <= last ; mirror++) { |
1658 | struct mirror_info *p = &conf->mirrors[mirror]; | 1678 | struct raid10_info *p = &conf->mirrors[mirror]; |
1659 | if (p->recovery_disabled == mddev->recovery_disabled) | 1679 | if (p->recovery_disabled == mddev->recovery_disabled) |
1660 | continue; | 1680 | continue; |
1661 | if (p->rdev) { | 1681 | if (p->rdev) { |
@@ -1709,7 +1729,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1709 | int err = 0; | 1729 | int err = 0; |
1710 | int number = rdev->raid_disk; | 1730 | int number = rdev->raid_disk; |
1711 | struct md_rdev **rdevp; | 1731 | struct md_rdev **rdevp; |
1712 | struct mirror_info *p = conf->mirrors + number; | 1732 | struct raid10_info *p = conf->mirrors + number; |
1713 | 1733 | ||
1714 | print_conf(conf); | 1734 | print_conf(conf); |
1715 | if (rdev == p->rdev) | 1735 | if (rdev == p->rdev) |
@@ -2876,7 +2896,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
2876 | sector_t sect; | 2896 | sector_t sect; |
2877 | int must_sync; | 2897 | int must_sync; |
2878 | int any_working; | 2898 | int any_working; |
2879 | struct mirror_info *mirror = &conf->mirrors[i]; | 2899 | struct raid10_info *mirror = &conf->mirrors[i]; |
2880 | 2900 | ||
2881 | if ((mirror->rdev == NULL || | 2901 | if ((mirror->rdev == NULL || |
2882 | test_bit(In_sync, &mirror->rdev->flags)) | 2902 | test_bit(In_sync, &mirror->rdev->flags)) |
@@ -3388,7 +3408,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) | |||
3388 | goto out; | 3408 | goto out; |
3389 | 3409 | ||
3390 | /* FIXME calc properly */ | 3410 | /* FIXME calc properly */ |
3391 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*(mddev->raid_disks + | 3411 | conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + |
3392 | max(0,mddev->delta_disks)), | 3412 | max(0,mddev->delta_disks)), |
3393 | GFP_KERNEL); | 3413 | GFP_KERNEL); |
3394 | if (!conf->mirrors) | 3414 | if (!conf->mirrors) |
@@ -3452,7 +3472,7 @@ static int run(struct mddev *mddev) | |||
3452 | { | 3472 | { |
3453 | struct r10conf *conf; | 3473 | struct r10conf *conf; |
3454 | int i, disk_idx, chunk_size; | 3474 | int i, disk_idx, chunk_size; |
3455 | struct mirror_info *disk; | 3475 | struct raid10_info *disk; |
3456 | struct md_rdev *rdev; | 3476 | struct md_rdev *rdev; |
3457 | sector_t size; | 3477 | sector_t size; |
3458 | sector_t min_offset_diff = 0; | 3478 | sector_t min_offset_diff = 0; |
@@ -3472,12 +3492,14 @@ static int run(struct mddev *mddev) | |||
3472 | conf->thread = NULL; | 3492 | conf->thread = NULL; |
3473 | 3493 | ||
3474 | chunk_size = mddev->chunk_sectors << 9; | 3494 | chunk_size = mddev->chunk_sectors << 9; |
3475 | blk_queue_io_min(mddev->queue, chunk_size); | 3495 | if (mddev->queue) { |
3476 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3496 | blk_queue_io_min(mddev->queue, chunk_size); |
3477 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3497 | if (conf->geo.raid_disks % conf->geo.near_copies) |
3478 | else | 3498 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
3479 | blk_queue_io_opt(mddev->queue, chunk_size * | 3499 | else |
3480 | (conf->geo.raid_disks / conf->geo.near_copies)); | 3500 | blk_queue_io_opt(mddev->queue, chunk_size * |
3501 | (conf->geo.raid_disks / conf->geo.near_copies)); | ||
3502 | } | ||
3481 | 3503 | ||
3482 | rdev_for_each(rdev, mddev) { | 3504 | rdev_for_each(rdev, mddev) { |
3483 | long long diff; | 3505 | long long diff; |
@@ -3511,8 +3533,9 @@ static int run(struct mddev *mddev) | |||
3511 | if (first || diff < min_offset_diff) | 3533 | if (first || diff < min_offset_diff) |
3512 | min_offset_diff = diff; | 3534 | min_offset_diff = diff; |
3513 | 3535 | ||
3514 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 3536 | if (mddev->gendisk) |
3515 | rdev->data_offset << 9); | 3537 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
3538 | rdev->data_offset << 9); | ||
3516 | 3539 | ||
3517 | disk->head_position = 0; | 3540 | disk->head_position = 0; |
3518 | } | 3541 | } |
@@ -3575,22 +3598,22 @@ static int run(struct mddev *mddev) | |||
3575 | md_set_array_sectors(mddev, size); | 3598 | md_set_array_sectors(mddev, size); |
3576 | mddev->resync_max_sectors = size; | 3599 | mddev->resync_max_sectors = size; |
3577 | 3600 | ||
3578 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 3601 | if (mddev->queue) { |
3579 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3580 | |||
3581 | /* Calculate max read-ahead size. | ||
3582 | * We need to readahead at least twice a whole stripe.... | ||
3583 | * maybe... | ||
3584 | */ | ||
3585 | { | ||
3586 | int stripe = conf->geo.raid_disks * | 3602 | int stripe = conf->geo.raid_disks * |
3587 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | 3603 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
3604 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | ||
3605 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3606 | |||
3607 | /* Calculate max read-ahead size. | ||
3608 | * We need to readahead at least twice a whole stripe.... | ||
3609 | * maybe... | ||
3610 | */ | ||
3588 | stripe /= conf->geo.near_copies; | 3611 | stripe /= conf->geo.near_copies; |
3589 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 3612 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
3590 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 3613 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
3614 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3591 | } | 3615 | } |
3592 | 3616 | ||
3593 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3594 | 3617 | ||
3595 | if (md_integrity_register(mddev)) | 3618 | if (md_integrity_register(mddev)) |
3596 | goto out_free_conf; | 3619 | goto out_free_conf; |
@@ -3641,7 +3664,10 @@ static int stop(struct mddev *mddev) | |||
3641 | lower_barrier(conf); | 3664 | lower_barrier(conf); |
3642 | 3665 | ||
3643 | md_unregister_thread(&mddev->thread); | 3666 | md_unregister_thread(&mddev->thread); |
3644 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 3667 | if (mddev->queue) |
3668 | /* the unplug fn references 'conf'*/ | ||
3669 | blk_sync_queue(mddev->queue); | ||
3670 | |||
3645 | if (conf->r10bio_pool) | 3671 | if (conf->r10bio_pool) |
3646 | mempool_destroy(conf->r10bio_pool); | 3672 | mempool_destroy(conf->r10bio_pool); |
3647 | kfree(conf->mirrors); | 3673 | kfree(conf->mirrors); |
@@ -3805,7 +3831,7 @@ static int raid10_check_reshape(struct mddev *mddev) | |||
3805 | if (mddev->delta_disks > 0) { | 3831 | if (mddev->delta_disks > 0) { |
3806 | /* allocate new 'mirrors' list */ | 3832 | /* allocate new 'mirrors' list */ |
3807 | conf->mirrors_new = kzalloc( | 3833 | conf->mirrors_new = kzalloc( |
3808 | sizeof(struct mirror_info) | 3834 | sizeof(struct raid10_info) |
3809 | *(mddev->raid_disks + | 3835 | *(mddev->raid_disks + |
3810 | mddev->delta_disks), | 3836 | mddev->delta_disks), |
3811 | GFP_KERNEL); | 3837 | GFP_KERNEL); |
@@ -3930,7 +3956,7 @@ static int raid10_start_reshape(struct mddev *mddev) | |||
3930 | spin_lock_irq(&conf->device_lock); | 3956 | spin_lock_irq(&conf->device_lock); |
3931 | if (conf->mirrors_new) { | 3957 | if (conf->mirrors_new) { |
3932 | memcpy(conf->mirrors_new, conf->mirrors, | 3958 | memcpy(conf->mirrors_new, conf->mirrors, |
3933 | sizeof(struct mirror_info)*conf->prev.raid_disks); | 3959 | sizeof(struct raid10_info)*conf->prev.raid_disks); |
3934 | smp_mb(); | 3960 | smp_mb(); |
3935 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ | 3961 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ |
3936 | conf->mirrors_old = conf->mirrors; | 3962 | conf->mirrors_old = conf->mirrors; |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 135b1b0a1554..007c2c68dd83 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _RAID10_H | 1 | #ifndef _RAID10_H |
2 | #define _RAID10_H | 2 | #define _RAID10_H |
3 | 3 | ||
4 | struct mirror_info { | 4 | struct raid10_info { |
5 | struct md_rdev *rdev, *replacement; | 5 | struct md_rdev *rdev, *replacement; |
6 | sector_t head_position; | 6 | sector_t head_position; |
7 | int recovery_disabled; /* matches | 7 | int recovery_disabled; /* matches |
@@ -13,8 +13,8 @@ struct mirror_info { | |||
13 | 13 | ||
14 | struct r10conf { | 14 | struct r10conf { |
15 | struct mddev *mddev; | 15 | struct mddev *mddev; |
16 | struct mirror_info *mirrors; | 16 | struct raid10_info *mirrors; |
17 | struct mirror_info *mirrors_new, *mirrors_old; | 17 | struct raid10_info *mirrors_new, *mirrors_old; |
18 | spinlock_t device_lock; | 18 | spinlock_t device_lock; |
19 | 19 | ||
20 | /* geometry */ | 20 | /* geometry */ |
@@ -123,20 +123,6 @@ struct r10bio { | |||
123 | } devs[0]; | 123 | } devs[0]; |
124 | }; | 124 | }; |
125 | 125 | ||
126 | /* when we get a read error on a read-only array, we redirect to another | ||
127 | * device without failing the first device, or trying to over-write to | ||
128 | * correct the read error. To keep track of bad blocks on a per-bio | ||
129 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
130 | */ | ||
131 | #define IO_BLOCKED ((struct bio*)1) | ||
132 | /* When we successfully write to a known bad-block, we need to remove the | ||
133 | * bad-block marking which must be done from process context. So we record | ||
134 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
135 | */ | ||
136 | #define IO_MADE_GOOD ((struct bio *)2) | ||
137 | |||
138 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
139 | |||
140 | /* bits for r10bio.state */ | 126 | /* bits for r10bio.state */ |
141 | enum r10bio_state { | 127 | enum r10bio_state { |
142 | R10BIO_Uptodate, | 128 | R10BIO_Uptodate, |
@@ -159,4 +145,7 @@ enum r10bio_state { | |||
159 | */ | 145 | */ |
160 | R10BIO_Previous, | 146 | R10BIO_Previous, |
161 | }; | 147 | }; |
148 | |||
149 | extern int md_raid10_congested(struct mddev *mddev, int bits); | ||
150 | |||
162 | #endif | 151 | #endif |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 04348d76bb30..259f519814ca 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -99,34 +99,40 @@ static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) | |||
99 | * We maintain a biased count of active stripes in the bottom 16 bits of | 99 | * We maintain a biased count of active stripes in the bottom 16 bits of |
100 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits | 100 | * bi_phys_segments, and a count of processed stripes in the upper 16 bits |
101 | */ | 101 | */ |
102 | static inline int raid5_bi_phys_segments(struct bio *bio) | 102 | static inline int raid5_bi_processed_stripes(struct bio *bio) |
103 | { | 103 | { |
104 | return bio->bi_phys_segments & 0xffff; | 104 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
105 | return (atomic_read(segments) >> 16) & 0xffff; | ||
105 | } | 106 | } |
106 | 107 | ||
107 | static inline int raid5_bi_hw_segments(struct bio *bio) | 108 | static inline int raid5_dec_bi_active_stripes(struct bio *bio) |
108 | { | 109 | { |
109 | return (bio->bi_phys_segments >> 16) & 0xffff; | 110 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
111 | return atomic_sub_return(1, segments) & 0xffff; | ||
110 | } | 112 | } |
111 | 113 | ||
112 | static inline int raid5_dec_bi_phys_segments(struct bio *bio) | 114 | static inline void raid5_inc_bi_active_stripes(struct bio *bio) |
113 | { | 115 | { |
114 | --bio->bi_phys_segments; | 116 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
115 | return raid5_bi_phys_segments(bio); | 117 | atomic_inc(segments); |
116 | } | 118 | } |
117 | 119 | ||
118 | static inline int raid5_dec_bi_hw_segments(struct bio *bio) | 120 | static inline void raid5_set_bi_processed_stripes(struct bio *bio, |
121 | unsigned int cnt) | ||
119 | { | 122 | { |
120 | unsigned short val = raid5_bi_hw_segments(bio); | 123 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
124 | int old, new; | ||
121 | 125 | ||
122 | --val; | 126 | do { |
123 | bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); | 127 | old = atomic_read(segments); |
124 | return val; | 128 | new = (old & 0xffff) | (cnt << 16); |
129 | } while (atomic_cmpxchg(segments, old, new) != old); | ||
125 | } | 130 | } |
126 | 131 | ||
127 | static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) | 132 | static inline void raid5_set_bi_stripes(struct bio *bio, unsigned int cnt) |
128 | { | 133 | { |
129 | bio->bi_phys_segments = raid5_bi_phys_segments(bio) | (cnt << 16); | 134 | atomic_t *segments = (atomic_t *)&bio->bi_phys_segments; |
135 | atomic_set(segments, cnt); | ||
130 | } | 136 | } |
131 | 137 | ||
132 | /* Find first data disk in a raid6 stripe */ | 138 | /* Find first data disk in a raid6 stripe */ |
@@ -190,49 +196,56 @@ static int stripe_operations_active(struct stripe_head *sh) | |||
190 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); | 196 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); |
191 | } | 197 | } |
192 | 198 | ||
193 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | 199 | static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) |
194 | { | 200 | { |
195 | if (atomic_dec_and_test(&sh->count)) { | 201 | BUG_ON(!list_empty(&sh->lru)); |
196 | BUG_ON(!list_empty(&sh->lru)); | 202 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
197 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 203 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
198 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 204 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
199 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 205 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
200 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 206 | list_add_tail(&sh->lru, &conf->delayed_list); |
201 | list_add_tail(&sh->lru, &conf->delayed_list); | 207 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
202 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 208 | sh->bm_seq - conf->seq_write > 0) |
203 | sh->bm_seq - conf->seq_write > 0) | 209 | list_add_tail(&sh->lru, &conf->bitmap_list); |
204 | list_add_tail(&sh->lru, &conf->bitmap_list); | 210 | else { |
205 | else { | 211 | clear_bit(STRIPE_DELAYED, &sh->state); |
206 | clear_bit(STRIPE_DELAYED, &sh->state); | 212 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
207 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 213 | list_add_tail(&sh->lru, &conf->handle_list); |
208 | list_add_tail(&sh->lru, &conf->handle_list); | 214 | } |
209 | } | 215 | md_wakeup_thread(conf->mddev->thread); |
210 | md_wakeup_thread(conf->mddev->thread); | 216 | } else { |
211 | } else { | 217 | BUG_ON(stripe_operations_active(sh)); |
212 | BUG_ON(stripe_operations_active(sh)); | 218 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
213 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 219 | if (atomic_dec_return(&conf->preread_active_stripes) |
214 | if (atomic_dec_return(&conf->preread_active_stripes) | 220 | < IO_THRESHOLD) |
215 | < IO_THRESHOLD) | 221 | md_wakeup_thread(conf->mddev->thread); |
216 | md_wakeup_thread(conf->mddev->thread); | 222 | atomic_dec(&conf->active_stripes); |
217 | atomic_dec(&conf->active_stripes); | 223 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { |
218 | if (!test_bit(STRIPE_EXPANDING, &sh->state)) { | 224 | list_add_tail(&sh->lru, &conf->inactive_list); |
219 | list_add_tail(&sh->lru, &conf->inactive_list); | 225 | wake_up(&conf->wait_for_stripe); |
220 | wake_up(&conf->wait_for_stripe); | 226 | if (conf->retry_read_aligned) |
221 | if (conf->retry_read_aligned) | 227 | md_wakeup_thread(conf->mddev->thread); |
222 | md_wakeup_thread(conf->mddev->thread); | ||
223 | } | ||
224 | } | 228 | } |
225 | } | 229 | } |
226 | } | 230 | } |
227 | 231 | ||
232 | static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) | ||
233 | { | ||
234 | if (atomic_dec_and_test(&sh->count)) | ||
235 | do_release_stripe(conf, sh); | ||
236 | } | ||
237 | |||
228 | static void release_stripe(struct stripe_head *sh) | 238 | static void release_stripe(struct stripe_head *sh) |
229 | { | 239 | { |
230 | struct r5conf *conf = sh->raid_conf; | 240 | struct r5conf *conf = sh->raid_conf; |
231 | unsigned long flags; | 241 | unsigned long flags; |
232 | 242 | ||
233 | spin_lock_irqsave(&conf->device_lock, flags); | 243 | local_irq_save(flags); |
234 | __release_stripe(conf, sh); | 244 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { |
235 | spin_unlock_irqrestore(&conf->device_lock, flags); | 245 | do_release_stripe(conf, sh); |
246 | spin_unlock(&conf->device_lock); | ||
247 | } | ||
248 | local_irq_restore(flags); | ||
236 | } | 249 | } |
237 | 250 | ||
238 | static inline void remove_hash(struct stripe_head *sh) | 251 | static inline void remove_hash(struct stripe_head *sh) |
@@ -640,6 +653,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) | |||
640 | else | 653 | else |
641 | bi->bi_sector = (sh->sector | 654 | bi->bi_sector = (sh->sector |
642 | + rdev->data_offset); | 655 | + rdev->data_offset); |
656 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) | ||
657 | bi->bi_rw |= REQ_FLUSH; | ||
658 | |||
643 | bi->bi_flags = 1 << BIO_UPTODATE; | 659 | bi->bi_flags = 1 << BIO_UPTODATE; |
644 | bi->bi_idx = 0; | 660 | bi->bi_idx = 0; |
645 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 661 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
@@ -749,14 +765,12 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
749 | { | 765 | { |
750 | struct stripe_head *sh = stripe_head_ref; | 766 | struct stripe_head *sh = stripe_head_ref; |
751 | struct bio *return_bi = NULL; | 767 | struct bio *return_bi = NULL; |
752 | struct r5conf *conf = sh->raid_conf; | ||
753 | int i; | 768 | int i; |
754 | 769 | ||
755 | pr_debug("%s: stripe %llu\n", __func__, | 770 | pr_debug("%s: stripe %llu\n", __func__, |
756 | (unsigned long long)sh->sector); | 771 | (unsigned long long)sh->sector); |
757 | 772 | ||
758 | /* clear completed biofills */ | 773 | /* clear completed biofills */ |
759 | spin_lock_irq(&conf->device_lock); | ||
760 | for (i = sh->disks; i--; ) { | 774 | for (i = sh->disks; i--; ) { |
761 | struct r5dev *dev = &sh->dev[i]; | 775 | struct r5dev *dev = &sh->dev[i]; |
762 | 776 | ||
@@ -774,7 +788,7 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
774 | while (rbi && rbi->bi_sector < | 788 | while (rbi && rbi->bi_sector < |
775 | dev->sector + STRIPE_SECTORS) { | 789 | dev->sector + STRIPE_SECTORS) { |
776 | rbi2 = r5_next_bio(rbi, dev->sector); | 790 | rbi2 = r5_next_bio(rbi, dev->sector); |
777 | if (!raid5_dec_bi_phys_segments(rbi)) { | 791 | if (!raid5_dec_bi_active_stripes(rbi)) { |
778 | rbi->bi_next = return_bi; | 792 | rbi->bi_next = return_bi; |
779 | return_bi = rbi; | 793 | return_bi = rbi; |
780 | } | 794 | } |
@@ -782,7 +796,6 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
782 | } | 796 | } |
783 | } | 797 | } |
784 | } | 798 | } |
785 | spin_unlock_irq(&conf->device_lock); | ||
786 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); | 799 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); |
787 | 800 | ||
788 | return_io(return_bi); | 801 | return_io(return_bi); |
@@ -794,7 +807,6 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
794 | static void ops_run_biofill(struct stripe_head *sh) | 807 | static void ops_run_biofill(struct stripe_head *sh) |
795 | { | 808 | { |
796 | struct dma_async_tx_descriptor *tx = NULL; | 809 | struct dma_async_tx_descriptor *tx = NULL; |
797 | struct r5conf *conf = sh->raid_conf; | ||
798 | struct async_submit_ctl submit; | 810 | struct async_submit_ctl submit; |
799 | int i; | 811 | int i; |
800 | 812 | ||
@@ -805,10 +817,10 @@ static void ops_run_biofill(struct stripe_head *sh) | |||
805 | struct r5dev *dev = &sh->dev[i]; | 817 | struct r5dev *dev = &sh->dev[i]; |
806 | if (test_bit(R5_Wantfill, &dev->flags)) { | 818 | if (test_bit(R5_Wantfill, &dev->flags)) { |
807 | struct bio *rbi; | 819 | struct bio *rbi; |
808 | spin_lock_irq(&conf->device_lock); | 820 | spin_lock_irq(&sh->stripe_lock); |
809 | dev->read = rbi = dev->toread; | 821 | dev->read = rbi = dev->toread; |
810 | dev->toread = NULL; | 822 | dev->toread = NULL; |
811 | spin_unlock_irq(&conf->device_lock); | 823 | spin_unlock_irq(&sh->stripe_lock); |
812 | while (rbi && rbi->bi_sector < | 824 | while (rbi && rbi->bi_sector < |
813 | dev->sector + STRIPE_SECTORS) { | 825 | dev->sector + STRIPE_SECTORS) { |
814 | tx = async_copy_data(0, rbi, dev->page, | 826 | tx = async_copy_data(0, rbi, dev->page, |
@@ -1144,12 +1156,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
1144 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { | 1156 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { |
1145 | struct bio *wbi; | 1157 | struct bio *wbi; |
1146 | 1158 | ||
1147 | spin_lock_irq(&sh->raid_conf->device_lock); | 1159 | spin_lock_irq(&sh->stripe_lock); |
1148 | chosen = dev->towrite; | 1160 | chosen = dev->towrite; |
1149 | dev->towrite = NULL; | 1161 | dev->towrite = NULL; |
1150 | BUG_ON(dev->written); | 1162 | BUG_ON(dev->written); |
1151 | wbi = dev->written = chosen; | 1163 | wbi = dev->written = chosen; |
1152 | spin_unlock_irq(&sh->raid_conf->device_lock); | 1164 | spin_unlock_irq(&sh->stripe_lock); |
1153 | 1165 | ||
1154 | while (wbi && wbi->bi_sector < | 1166 | while (wbi && wbi->bi_sector < |
1155 | dev->sector + STRIPE_SECTORS) { | 1167 | dev->sector + STRIPE_SECTORS) { |
@@ -1454,6 +1466,8 @@ static int grow_one_stripe(struct r5conf *conf) | |||
1454 | init_waitqueue_head(&sh->ops.wait_for_ops); | 1466 | init_waitqueue_head(&sh->ops.wait_for_ops); |
1455 | #endif | 1467 | #endif |
1456 | 1468 | ||
1469 | spin_lock_init(&sh->stripe_lock); | ||
1470 | |||
1457 | if (grow_buffers(sh)) { | 1471 | if (grow_buffers(sh)) { |
1458 | shrink_buffers(sh); | 1472 | shrink_buffers(sh); |
1459 | kmem_cache_free(conf->slab_cache, sh); | 1473 | kmem_cache_free(conf->slab_cache, sh); |
@@ -1739,7 +1753,9 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1739 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); | 1753 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); |
1740 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1754 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1741 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1755 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
1742 | } | 1756 | } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) |
1757 | clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1758 | |||
1743 | if (atomic_read(&rdev->read_errors)) | 1759 | if (atomic_read(&rdev->read_errors)) |
1744 | atomic_set(&rdev->read_errors, 0); | 1760 | atomic_set(&rdev->read_errors, 0); |
1745 | } else { | 1761 | } else { |
@@ -1784,7 +1800,11 @@ static void raid5_end_read_request(struct bio * bi, int error) | |||
1784 | else | 1800 | else |
1785 | retry = 1; | 1801 | retry = 1; |
1786 | if (retry) | 1802 | if (retry) |
1787 | set_bit(R5_ReadError, &sh->dev[i].flags); | 1803 | if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { |
1804 | set_bit(R5_ReadError, &sh->dev[i].flags); | ||
1805 | clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1806 | } else | ||
1807 | set_bit(R5_ReadNoMerge, &sh->dev[i].flags); | ||
1788 | else { | 1808 | else { |
1789 | clear_bit(R5_ReadError, &sh->dev[i].flags); | 1809 | clear_bit(R5_ReadError, &sh->dev[i].flags); |
1790 | clear_bit(R5_ReWrite, &sh->dev[i].flags); | 1810 | clear_bit(R5_ReWrite, &sh->dev[i].flags); |
@@ -2340,11 +2360,18 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2340 | (unsigned long long)bi->bi_sector, | 2360 | (unsigned long long)bi->bi_sector, |
2341 | (unsigned long long)sh->sector); | 2361 | (unsigned long long)sh->sector); |
2342 | 2362 | ||
2343 | 2363 | /* | |
2344 | spin_lock_irq(&conf->device_lock); | 2364 | * If several bio share a stripe. The bio bi_phys_segments acts as a |
2365 | * reference count to avoid race. The reference count should already be | ||
2366 | * increased before this function is called (for example, in | ||
2367 | * make_request()), so other bio sharing this stripe will not free the | ||
2368 | * stripe. If a stripe is owned by one stripe, the stripe lock will | ||
2369 | * protect it. | ||
2370 | */ | ||
2371 | spin_lock_irq(&sh->stripe_lock); | ||
2345 | if (forwrite) { | 2372 | if (forwrite) { |
2346 | bip = &sh->dev[dd_idx].towrite; | 2373 | bip = &sh->dev[dd_idx].towrite; |
2347 | if (*bip == NULL && sh->dev[dd_idx].written == NULL) | 2374 | if (*bip == NULL) |
2348 | firstwrite = 1; | 2375 | firstwrite = 1; |
2349 | } else | 2376 | } else |
2350 | bip = &sh->dev[dd_idx].toread; | 2377 | bip = &sh->dev[dd_idx].toread; |
@@ -2360,7 +2387,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2360 | if (*bip) | 2387 | if (*bip) |
2361 | bi->bi_next = *bip; | 2388 | bi->bi_next = *bip; |
2362 | *bip = bi; | 2389 | *bip = bi; |
2363 | bi->bi_phys_segments++; | 2390 | raid5_inc_bi_active_stripes(bi); |
2364 | 2391 | ||
2365 | if (forwrite) { | 2392 | if (forwrite) { |
2366 | /* check if page is covered */ | 2393 | /* check if page is covered */ |
@@ -2375,7 +2402,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2375 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) | 2402 | if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) |
2376 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); | 2403 | set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); |
2377 | } | 2404 | } |
2378 | spin_unlock_irq(&conf->device_lock); | 2405 | spin_unlock_irq(&sh->stripe_lock); |
2379 | 2406 | ||
2380 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", | 2407 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", |
2381 | (unsigned long long)(*bip)->bi_sector, | 2408 | (unsigned long long)(*bip)->bi_sector, |
@@ -2391,7 +2418,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
2391 | 2418 | ||
2392 | overlap: | 2419 | overlap: |
2393 | set_bit(R5_Overlap, &sh->dev[dd_idx].flags); | 2420 | set_bit(R5_Overlap, &sh->dev[dd_idx].flags); |
2394 | spin_unlock_irq(&conf->device_lock); | 2421 | spin_unlock_irq(&sh->stripe_lock); |
2395 | return 0; | 2422 | return 0; |
2396 | } | 2423 | } |
2397 | 2424 | ||
@@ -2441,10 +2468,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2441 | rdev_dec_pending(rdev, conf->mddev); | 2468 | rdev_dec_pending(rdev, conf->mddev); |
2442 | } | 2469 | } |
2443 | } | 2470 | } |
2444 | spin_lock_irq(&conf->device_lock); | 2471 | spin_lock_irq(&sh->stripe_lock); |
2445 | /* fail all writes first */ | 2472 | /* fail all writes first */ |
2446 | bi = sh->dev[i].towrite; | 2473 | bi = sh->dev[i].towrite; |
2447 | sh->dev[i].towrite = NULL; | 2474 | sh->dev[i].towrite = NULL; |
2475 | spin_unlock_irq(&sh->stripe_lock); | ||
2448 | if (bi) { | 2476 | if (bi) { |
2449 | s->to_write--; | 2477 | s->to_write--; |
2450 | bitmap_end = 1; | 2478 | bitmap_end = 1; |
@@ -2457,13 +2485,17 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2457 | sh->dev[i].sector + STRIPE_SECTORS) { | 2485 | sh->dev[i].sector + STRIPE_SECTORS) { |
2458 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); | 2486 | struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); |
2459 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2487 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2460 | if (!raid5_dec_bi_phys_segments(bi)) { | 2488 | if (!raid5_dec_bi_active_stripes(bi)) { |
2461 | md_write_end(conf->mddev); | 2489 | md_write_end(conf->mddev); |
2462 | bi->bi_next = *return_bi; | 2490 | bi->bi_next = *return_bi; |
2463 | *return_bi = bi; | 2491 | *return_bi = bi; |
2464 | } | 2492 | } |
2465 | bi = nextbi; | 2493 | bi = nextbi; |
2466 | } | 2494 | } |
2495 | if (bitmap_end) | ||
2496 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | ||
2497 | STRIPE_SECTORS, 0, 0); | ||
2498 | bitmap_end = 0; | ||
2467 | /* and fail all 'written' */ | 2499 | /* and fail all 'written' */ |
2468 | bi = sh->dev[i].written; | 2500 | bi = sh->dev[i].written; |
2469 | sh->dev[i].written = NULL; | 2501 | sh->dev[i].written = NULL; |
@@ -2472,7 +2504,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2472 | sh->dev[i].sector + STRIPE_SECTORS) { | 2504 | sh->dev[i].sector + STRIPE_SECTORS) { |
2473 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); | 2505 | struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); |
2474 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2506 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2475 | if (!raid5_dec_bi_phys_segments(bi)) { | 2507 | if (!raid5_dec_bi_active_stripes(bi)) { |
2476 | md_write_end(conf->mddev); | 2508 | md_write_end(conf->mddev); |
2477 | bi->bi_next = *return_bi; | 2509 | bi->bi_next = *return_bi; |
2478 | *return_bi = bi; | 2510 | *return_bi = bi; |
@@ -2496,14 +2528,13 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
2496 | struct bio *nextbi = | 2528 | struct bio *nextbi = |
2497 | r5_next_bio(bi, sh->dev[i].sector); | 2529 | r5_next_bio(bi, sh->dev[i].sector); |
2498 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 2530 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
2499 | if (!raid5_dec_bi_phys_segments(bi)) { | 2531 | if (!raid5_dec_bi_active_stripes(bi)) { |
2500 | bi->bi_next = *return_bi; | 2532 | bi->bi_next = *return_bi; |
2501 | *return_bi = bi; | 2533 | *return_bi = bi; |
2502 | } | 2534 | } |
2503 | bi = nextbi; | 2535 | bi = nextbi; |
2504 | } | 2536 | } |
2505 | } | 2537 | } |
2506 | spin_unlock_irq(&conf->device_lock); | ||
2507 | if (bitmap_end) | 2538 | if (bitmap_end) |
2508 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, | 2539 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, |
2509 | STRIPE_SECTORS, 0, 0); | 2540 | STRIPE_SECTORS, 0, 0); |
@@ -2707,30 +2738,23 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
2707 | test_bit(R5_UPTODATE, &dev->flags)) { | 2738 | test_bit(R5_UPTODATE, &dev->flags)) { |
2708 | /* We can return any write requests */ | 2739 | /* We can return any write requests */ |
2709 | struct bio *wbi, *wbi2; | 2740 | struct bio *wbi, *wbi2; |
2710 | int bitmap_end = 0; | ||
2711 | pr_debug("Return write for disc %d\n", i); | 2741 | pr_debug("Return write for disc %d\n", i); |
2712 | spin_lock_irq(&conf->device_lock); | ||
2713 | wbi = dev->written; | 2742 | wbi = dev->written; |
2714 | dev->written = NULL; | 2743 | dev->written = NULL; |
2715 | while (wbi && wbi->bi_sector < | 2744 | while (wbi && wbi->bi_sector < |
2716 | dev->sector + STRIPE_SECTORS) { | 2745 | dev->sector + STRIPE_SECTORS) { |
2717 | wbi2 = r5_next_bio(wbi, dev->sector); | 2746 | wbi2 = r5_next_bio(wbi, dev->sector); |
2718 | if (!raid5_dec_bi_phys_segments(wbi)) { | 2747 | if (!raid5_dec_bi_active_stripes(wbi)) { |
2719 | md_write_end(conf->mddev); | 2748 | md_write_end(conf->mddev); |
2720 | wbi->bi_next = *return_bi; | 2749 | wbi->bi_next = *return_bi; |
2721 | *return_bi = wbi; | 2750 | *return_bi = wbi; |
2722 | } | 2751 | } |
2723 | wbi = wbi2; | 2752 | wbi = wbi2; |
2724 | } | 2753 | } |
2725 | if (dev->towrite == NULL) | 2754 | bitmap_endwrite(conf->mddev->bitmap, sh->sector, |
2726 | bitmap_end = 1; | 2755 | STRIPE_SECTORS, |
2727 | spin_unlock_irq(&conf->device_lock); | ||
2728 | if (bitmap_end) | ||
2729 | bitmap_endwrite(conf->mddev->bitmap, | ||
2730 | sh->sector, | ||
2731 | STRIPE_SECTORS, | ||
2732 | !test_bit(STRIPE_DEGRADED, &sh->state), | 2756 | !test_bit(STRIPE_DEGRADED, &sh->state), |
2733 | 0); | 2757 | 0); |
2734 | } | 2758 | } |
2735 | } | 2759 | } |
2736 | 2760 | ||
@@ -3182,7 +3206,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3182 | 3206 | ||
3183 | /* Now to look around and see what can be done */ | 3207 | /* Now to look around and see what can be done */ |
3184 | rcu_read_lock(); | 3208 | rcu_read_lock(); |
3185 | spin_lock_irq(&conf->device_lock); | ||
3186 | for (i=disks; i--; ) { | 3209 | for (i=disks; i--; ) { |
3187 | struct md_rdev *rdev; | 3210 | struct md_rdev *rdev; |
3188 | sector_t first_bad; | 3211 | sector_t first_bad; |
@@ -3328,7 +3351,6 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3328 | do_recovery = 1; | 3351 | do_recovery = 1; |
3329 | } | 3352 | } |
3330 | } | 3353 | } |
3331 | spin_unlock_irq(&conf->device_lock); | ||
3332 | if (test_bit(STRIPE_SYNCING, &sh->state)) { | 3354 | if (test_bit(STRIPE_SYNCING, &sh->state)) { |
3333 | /* If there is a failed device being replaced, | 3355 | /* If there is a failed device being replaced, |
3334 | * we must be recovering. | 3356 | * we must be recovering. |
@@ -3791,7 +3813,7 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) | |||
3791 | * this sets the active strip count to 1 and the processed | 3813 | * this sets the active strip count to 1 and the processed |
3792 | * strip count to zero (upper 8 bits) | 3814 | * strip count to zero (upper 8 bits) |
3793 | */ | 3815 | */ |
3794 | bi->bi_phys_segments = 1; /* biased count of active stripes */ | 3816 | raid5_set_bi_stripes(bi, 1); /* biased count of active stripes */ |
3795 | } | 3817 | } |
3796 | 3818 | ||
3797 | return bi; | 3819 | return bi; |
@@ -4113,7 +4135,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4113 | finish_wait(&conf->wait_for_overlap, &w); | 4135 | finish_wait(&conf->wait_for_overlap, &w); |
4114 | set_bit(STRIPE_HANDLE, &sh->state); | 4136 | set_bit(STRIPE_HANDLE, &sh->state); |
4115 | clear_bit(STRIPE_DELAYED, &sh->state); | 4137 | clear_bit(STRIPE_DELAYED, &sh->state); |
4116 | if ((bi->bi_rw & REQ_SYNC) && | 4138 | if ((bi->bi_rw & REQ_NOIDLE) && |
4117 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4139 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4118 | atomic_inc(&conf->preread_active_stripes); | 4140 | atomic_inc(&conf->preread_active_stripes); |
4119 | mddev_check_plugged(mddev); | 4141 | mddev_check_plugged(mddev); |
@@ -4126,9 +4148,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4126 | } | 4148 | } |
4127 | } | 4149 | } |
4128 | 4150 | ||
4129 | spin_lock_irq(&conf->device_lock); | 4151 | remaining = raid5_dec_bi_active_stripes(bi); |
4130 | remaining = raid5_dec_bi_phys_segments(bi); | ||
4131 | spin_unlock_irq(&conf->device_lock); | ||
4132 | if (remaining == 0) { | 4152 | if (remaining == 0) { |
4133 | 4153 | ||
4134 | if ( rw == WRITE ) | 4154 | if ( rw == WRITE ) |
@@ -4484,7 +4504,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4484 | sector += STRIPE_SECTORS, | 4504 | sector += STRIPE_SECTORS, |
4485 | scnt++) { | 4505 | scnt++) { |
4486 | 4506 | ||
4487 | if (scnt < raid5_bi_hw_segments(raid_bio)) | 4507 | if (scnt < raid5_bi_processed_stripes(raid_bio)) |
4488 | /* already done this stripe */ | 4508 | /* already done this stripe */ |
4489 | continue; | 4509 | continue; |
4490 | 4510 | ||
@@ -4492,25 +4512,24 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4492 | 4512 | ||
4493 | if (!sh) { | 4513 | if (!sh) { |
4494 | /* failed to get a stripe - must wait */ | 4514 | /* failed to get a stripe - must wait */ |
4495 | raid5_set_bi_hw_segments(raid_bio, scnt); | 4515 | raid5_set_bi_processed_stripes(raid_bio, scnt); |
4496 | conf->retry_read_aligned = raid_bio; | 4516 | conf->retry_read_aligned = raid_bio; |
4497 | return handled; | 4517 | return handled; |
4498 | } | 4518 | } |
4499 | 4519 | ||
4500 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { | 4520 | if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { |
4501 | release_stripe(sh); | 4521 | release_stripe(sh); |
4502 | raid5_set_bi_hw_segments(raid_bio, scnt); | 4522 | raid5_set_bi_processed_stripes(raid_bio, scnt); |
4503 | conf->retry_read_aligned = raid_bio; | 4523 | conf->retry_read_aligned = raid_bio; |
4504 | return handled; | 4524 | return handled; |
4505 | } | 4525 | } |
4506 | 4526 | ||
4527 | set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); | ||
4507 | handle_stripe(sh); | 4528 | handle_stripe(sh); |
4508 | release_stripe(sh); | 4529 | release_stripe(sh); |
4509 | handled++; | 4530 | handled++; |
4510 | } | 4531 | } |
4511 | spin_lock_irq(&conf->device_lock); | 4532 | remaining = raid5_dec_bi_active_stripes(raid_bio); |
4512 | remaining = raid5_dec_bi_phys_segments(raid_bio); | ||
4513 | spin_unlock_irq(&conf->device_lock); | ||
4514 | if (remaining == 0) | 4533 | if (remaining == 0) |
4515 | bio_endio(raid_bio, 0); | 4534 | bio_endio(raid_bio, 0); |
4516 | if (atomic_dec_and_test(&conf->active_aligned_reads)) | 4535 | if (atomic_dec_and_test(&conf->active_aligned_reads)) |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 2164021f3b5f..61dbb615c30b 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -210,6 +210,7 @@ struct stripe_head { | |||
210 | int disks; /* disks in stripe */ | 210 | int disks; /* disks in stripe */ |
211 | enum check_states check_state; | 211 | enum check_states check_state; |
212 | enum reconstruct_states reconstruct_state; | 212 | enum reconstruct_states reconstruct_state; |
213 | spinlock_t stripe_lock; | ||
213 | /** | 214 | /** |
214 | * struct stripe_operations | 215 | * struct stripe_operations |
215 | * @target - STRIPE_OP_COMPUTE_BLK target | 216 | * @target - STRIPE_OP_COMPUTE_BLK target |
@@ -273,6 +274,7 @@ enum r5dev_flags { | |||
273 | R5_Wantwrite, | 274 | R5_Wantwrite, |
274 | R5_Overlap, /* There is a pending overlapping request | 275 | R5_Overlap, /* There is a pending overlapping request |
275 | * on this block */ | 276 | * on this block */ |
277 | R5_ReadNoMerge, /* prevent bio from merging in block-layer */ | ||
276 | R5_ReadError, /* seen a read error here recently */ | 278 | R5_ReadError, /* seen a read error here recently */ |
277 | R5_ReWrite, /* have tried to over-write the readerror */ | 279 | R5_ReWrite, /* have tried to over-write the readerror */ |
278 | 280 | ||