diff options
author | James Morris <jmorris@namei.org> | 2011-04-19 07:32:41 -0400 |
---|---|---|
committer | James Morris <jmorris@namei.org> | 2011-04-19 07:32:41 -0400 |
commit | d4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch) | |
tree | eefd82c155bc27469a85667d759cd90facf4a6e3 /drivers/md | |
parent | c0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff) | |
parent | 96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff) |
Merge branch 'master'; commit 'v2.6.39-rc3' into next
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.h | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-region-hash.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 114 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 89 | ||||
-rw-r--r-- | drivers/md/md.h | 28 | ||||
-rw-r--r-- | drivers/md/raid1.c | 29 | ||||
-rw-r--r-- | drivers/md/raid10.c | 33 | ||||
-rw-r--r-- | drivers/md/raid10.h | 4 | ||||
-rw-r--r-- | drivers/md/raid5.c | 61 | ||||
-rw-r--r-- | drivers/md/raid5.h | 2 |
12 files changed, 193 insertions, 181 deletions
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 931a7a7c3796..d0aeaf46d932 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -45,7 +45,7 @@ | |||
45 | * | 45 | * |
46 | * The counter counts pending write requests, plus the on-disk bit. | 46 | * The counter counts pending write requests, plus the on-disk bit. |
47 | * When the counter is '1' and the resync bits are clear, the on-disk | 47 | * When the counter is '1' and the resync bits are clear, the on-disk |
48 | * bit can be cleared aswell, thus setting the counter to 0. | 48 | * bit can be cleared as well, thus setting the counter to 0. |
49 | * When we set a bit, or in the counter (to start a write), if the fields is | 49 | * When we set a bit, or in the counter (to start a write), if the fields is |
50 | * 0, we first set the disk bit and set the counter to 1. | 50 | * 0, we first set the disk bit and set the counter to 1. |
51 | * | 51 | * |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5ef136cdba91..e5d8904fc8f6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
@@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
390 | return md_raid5_congested(&rs->md, bits); | 390 | return md_raid5_congested(&rs->md, bits); |
391 | } | 391 | } |
392 | 392 | ||
393 | static void raid_unplug(struct dm_target_callbacks *cb) | ||
394 | { | ||
395 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | ||
396 | |||
397 | md_raid5_kick_device(rs->md.private); | ||
398 | } | ||
399 | |||
400 | /* | 393 | /* |
401 | * Construct a RAID4/5/6 mapping: | 394 | * Construct a RAID4/5/6 mapping: |
402 | * Args: | 395 | * Args: |
@@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
487 | } | 480 | } |
488 | 481 | ||
489 | rs->callbacks.congested_fn = raid_is_congested; | 482 | rs->callbacks.congested_fn = raid_is_congested; |
490 | rs->callbacks.unplug_fn = raid_unplug; | ||
491 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 483 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
492 | 484 | ||
493 | return 0; | 485 | return 0; |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index dad011aed0c9..7771ed212182 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
@@ -419,7 +419,7 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | |||
419 | /* | 419 | /* |
420 | * Possible cases: | 420 | * Possible cases: |
421 | * 1) DM_RH_DIRTY | 421 | * 1) DM_RH_DIRTY |
422 | * 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed | 422 | * 2) DM_RH_NOSYNC: was dirty, other preceding writes failed |
423 | * 3) DM_RH_RECOVERING: flushing pending writes | 423 | * 3) DM_RH_RECOVERING: flushing pending writes |
424 | * Either case, the region should have not been connected to list. | 424 | * Either case, the region should have not been connected to list. |
425 | */ | 425 | */ |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 416d4e258df6..cb8380c9767f 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -927,20 +927,80 @@ static int dm_table_build_index(struct dm_table *t) | |||
927 | } | 927 | } |
928 | 928 | ||
929 | /* | 929 | /* |
930 | * Get a disk whose integrity profile reflects the table's profile. | ||
931 | * If %match_all is true, all devices' profiles must match. | ||
932 | * If %match_all is false, all devices must at least have an | ||
933 | * allocated integrity profile; but uninitialized is ok. | ||
934 | * Returns NULL if integrity support was inconsistent or unavailable. | ||
935 | */ | ||
936 | static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, | ||
937 | bool match_all) | ||
938 | { | ||
939 | struct list_head *devices = dm_table_get_devices(t); | ||
940 | struct dm_dev_internal *dd = NULL; | ||
941 | struct gendisk *prev_disk = NULL, *template_disk = NULL; | ||
942 | |||
943 | list_for_each_entry(dd, devices, list) { | ||
944 | template_disk = dd->dm_dev.bdev->bd_disk; | ||
945 | if (!blk_get_integrity(template_disk)) | ||
946 | goto no_integrity; | ||
947 | if (!match_all && !blk_integrity_is_initialized(template_disk)) | ||
948 | continue; /* skip uninitialized profiles */ | ||
949 | else if (prev_disk && | ||
950 | blk_integrity_compare(prev_disk, template_disk) < 0) | ||
951 | goto no_integrity; | ||
952 | prev_disk = template_disk; | ||
953 | } | ||
954 | |||
955 | return template_disk; | ||
956 | |||
957 | no_integrity: | ||
958 | if (prev_disk) | ||
959 | DMWARN("%s: integrity not set: %s and %s profile mismatch", | ||
960 | dm_device_name(t->md), | ||
961 | prev_disk->disk_name, | ||
962 | template_disk->disk_name); | ||
963 | return NULL; | ||
964 | } | ||
965 | |||
966 | /* | ||
930 | * Register the mapped device for blk_integrity support if | 967 | * Register the mapped device for blk_integrity support if |
931 | * the underlying devices support it. | 968 | * the underlying devices have an integrity profile. But all devices |
969 | * may not have matching profiles (checking all devices isn't reliable | ||
970 | * during table load because this table may use other DM device(s) which | ||
971 | * must be resumed before they will have an initialized integity profile). | ||
972 | * Stacked DM devices force a 2 stage integrity profile validation: | ||
973 | * 1 - during load, validate all initialized integrity profiles match | ||
974 | * 2 - during resume, validate all integrity profiles match | ||
932 | */ | 975 | */ |
933 | static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) | 976 | static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) |
934 | { | 977 | { |
935 | struct list_head *devices = dm_table_get_devices(t); | 978 | struct gendisk *template_disk = NULL; |
936 | struct dm_dev_internal *dd; | ||
937 | 979 | ||
938 | list_for_each_entry(dd, devices, list) | 980 | template_disk = dm_table_get_integrity_disk(t, false); |
939 | if (bdev_get_integrity(dd->dm_dev.bdev)) { | 981 | if (!template_disk) |
940 | t->integrity_supported = 1; | 982 | return 0; |
941 | return blk_integrity_register(dm_disk(md), NULL); | ||
942 | } | ||
943 | 983 | ||
984 | if (!blk_integrity_is_initialized(dm_disk(md))) { | ||
985 | t->integrity_supported = 1; | ||
986 | return blk_integrity_register(dm_disk(md), NULL); | ||
987 | } | ||
988 | |||
989 | /* | ||
990 | * If DM device already has an initalized integrity | ||
991 | * profile the new profile should not conflict. | ||
992 | */ | ||
993 | if (blk_integrity_is_initialized(template_disk) && | ||
994 | blk_integrity_compare(dm_disk(md), template_disk) < 0) { | ||
995 | DMWARN("%s: conflict with existing integrity profile: " | ||
996 | "%s profile mismatch", | ||
997 | dm_device_name(t->md), | ||
998 | template_disk->disk_name); | ||
999 | return 1; | ||
1000 | } | ||
1001 | |||
1002 | /* Preserve existing initialized integrity profile */ | ||
1003 | t->integrity_supported = 1; | ||
944 | return 0; | 1004 | return 0; |
945 | } | 1005 | } |
946 | 1006 | ||
@@ -1094,41 +1154,27 @@ combine_limits: | |||
1094 | 1154 | ||
1095 | /* | 1155 | /* |
1096 | * Set the integrity profile for this device if all devices used have | 1156 | * Set the integrity profile for this device if all devices used have |
1097 | * matching profiles. | 1157 | * matching profiles. We're quite deep in the resume path but still |
1158 | * don't know if all devices (particularly DM devices this device | ||
1159 | * may be stacked on) have matching profiles. Even if the profiles | ||
1160 | * don't match we have no way to fail (to resume) at this point. | ||
1098 | */ | 1161 | */ |
1099 | static void dm_table_set_integrity(struct dm_table *t) | 1162 | static void dm_table_set_integrity(struct dm_table *t) |
1100 | { | 1163 | { |
1101 | struct list_head *devices = dm_table_get_devices(t); | 1164 | struct gendisk *template_disk = NULL; |
1102 | struct dm_dev_internal *prev = NULL, *dd = NULL; | ||
1103 | 1165 | ||
1104 | if (!blk_get_integrity(dm_disk(t->md))) | 1166 | if (!blk_get_integrity(dm_disk(t->md))) |
1105 | return; | 1167 | return; |
1106 | 1168 | ||
1107 | list_for_each_entry(dd, devices, list) { | 1169 | template_disk = dm_table_get_integrity_disk(t, true); |
1108 | if (prev && | 1170 | if (!template_disk && |
1109 | blk_integrity_compare(prev->dm_dev.bdev->bd_disk, | 1171 | blk_integrity_is_initialized(dm_disk(t->md))) { |
1110 | dd->dm_dev.bdev->bd_disk) < 0) { | 1172 | DMWARN("%s: device no longer has a valid integrity profile", |
1111 | DMWARN("%s: integrity not set: %s and %s mismatch", | 1173 | dm_device_name(t->md)); |
1112 | dm_device_name(t->md), | 1174 | return; |
1113 | prev->dm_dev.bdev->bd_disk->disk_name, | ||
1114 | dd->dm_dev.bdev->bd_disk->disk_name); | ||
1115 | goto no_integrity; | ||
1116 | } | ||
1117 | prev = dd; | ||
1118 | } | 1175 | } |
1119 | |||
1120 | if (!prev || !bdev_get_integrity(prev->dm_dev.bdev)) | ||
1121 | goto no_integrity; | ||
1122 | |||
1123 | blk_integrity_register(dm_disk(t->md), | 1176 | blk_integrity_register(dm_disk(t->md), |
1124 | bdev_get_integrity(prev->dm_dev.bdev)); | 1177 | blk_get_integrity(template_disk)); |
1125 | |||
1126 | return; | ||
1127 | |||
1128 | no_integrity: | ||
1129 | blk_integrity_register(dm_disk(t->md), NULL); | ||
1130 | |||
1131 | return; | ||
1132 | } | 1178 | } |
1133 | 1179 | ||
1134 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 1180 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 339fdc670751..23078dabb6df 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
@@ -30,7 +30,7 @@ | |||
30 | * | 30 | * |
31 | * Different modes can be active at a time, but only | 31 | * Different modes can be active at a time, but only |
32 | * one can be set at array creation. Others can be added later. | 32 | * one can be set at array creation. Others can be added later. |
33 | * A mode can be one-shot or recurrent with the recurrance being | 33 | * A mode can be one-shot or recurrent with the recurrence being |
34 | * once in every N requests. | 34 | * once in every N requests. |
35 | * The bottom 5 bits of the "layout" indicate the mode. The | 35 | * The bottom 5 bits of the "layout" indicate the mode. The |
36 | * remainder indicate a period, or 0 for one-shot. | 36 | * remainder indicate a period, or 0 for one-shot. |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 8b66e04c2ea6..6e853c61d87e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -447,48 +447,59 @@ EXPORT_SYMBOL(md_flush_request); | |||
447 | 447 | ||
448 | /* Support for plugging. | 448 | /* Support for plugging. |
449 | * This mirrors the plugging support in request_queue, but does not | 449 | * This mirrors the plugging support in request_queue, but does not |
450 | * require having a whole queue | 450 | * require having a whole queue or request structures. |
451 | * We allocate an md_plug_cb for each md device and each thread it gets | ||
452 | * plugged on. This links tot the private plug_handle structure in the | ||
453 | * personality data where we keep a count of the number of outstanding | ||
454 | * plugs so other code can see if a plug is active. | ||
451 | */ | 455 | */ |
452 | static void plugger_work(struct work_struct *work) | 456 | struct md_plug_cb { |
453 | { | 457 | struct blk_plug_cb cb; |
454 | struct plug_handle *plug = | 458 | mddev_t *mddev; |
455 | container_of(work, struct plug_handle, unplug_work); | 459 | }; |
456 | plug->unplug_fn(plug); | ||
457 | } | ||
458 | static void plugger_timeout(unsigned long data) | ||
459 | { | ||
460 | struct plug_handle *plug = (void *)data; | ||
461 | kblockd_schedule_work(NULL, &plug->unplug_work); | ||
462 | } | ||
463 | void plugger_init(struct plug_handle *plug, | ||
464 | void (*unplug_fn)(struct plug_handle *)) | ||
465 | { | ||
466 | plug->unplug_flag = 0; | ||
467 | plug->unplug_fn = unplug_fn; | ||
468 | init_timer(&plug->unplug_timer); | ||
469 | plug->unplug_timer.function = plugger_timeout; | ||
470 | plug->unplug_timer.data = (unsigned long)plug; | ||
471 | INIT_WORK(&plug->unplug_work, plugger_work); | ||
472 | } | ||
473 | EXPORT_SYMBOL_GPL(plugger_init); | ||
474 | 460 | ||
475 | void plugger_set_plug(struct plug_handle *plug) | 461 | static void plugger_unplug(struct blk_plug_cb *cb) |
476 | { | 462 | { |
477 | if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) | 463 | struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb); |
478 | mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); | 464 | if (atomic_dec_and_test(&mdcb->mddev->plug_cnt)) |
465 | md_wakeup_thread(mdcb->mddev->thread); | ||
466 | kfree(mdcb); | ||
479 | } | 467 | } |
480 | EXPORT_SYMBOL_GPL(plugger_set_plug); | ||
481 | 468 | ||
482 | int plugger_remove_plug(struct plug_handle *plug) | 469 | /* Check that an unplug wakeup will come shortly. |
470 | * If not, wakeup the md thread immediately | ||
471 | */ | ||
472 | int mddev_check_plugged(mddev_t *mddev) | ||
483 | { | 473 | { |
484 | if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { | 474 | struct blk_plug *plug = current->plug; |
485 | del_timer(&plug->unplug_timer); | 475 | struct md_plug_cb *mdcb; |
486 | return 1; | 476 | |
487 | } else | 477 | if (!plug) |
478 | return 0; | ||
479 | |||
480 | list_for_each_entry(mdcb, &plug->cb_list, cb.list) { | ||
481 | if (mdcb->cb.callback == plugger_unplug && | ||
482 | mdcb->mddev == mddev) { | ||
483 | /* Already on the list, move to top */ | ||
484 | if (mdcb != list_first_entry(&plug->cb_list, | ||
485 | struct md_plug_cb, | ||
486 | cb.list)) | ||
487 | list_move(&mdcb->cb.list, &plug->cb_list); | ||
488 | return 1; | ||
489 | } | ||
490 | } | ||
491 | /* Not currently on the callback list */ | ||
492 | mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC); | ||
493 | if (!mdcb) | ||
488 | return 0; | 494 | return 0; |
489 | } | ||
490 | EXPORT_SYMBOL_GPL(plugger_remove_plug); | ||
491 | 495 | ||
496 | mdcb->mddev = mddev; | ||
497 | mdcb->cb.callback = plugger_unplug; | ||
498 | atomic_inc(&mddev->plug_cnt); | ||
499 | list_add(&mdcb->cb.list, &plug->cb_list); | ||
500 | return 1; | ||
501 | } | ||
502 | EXPORT_SYMBOL_GPL(mddev_check_plugged); | ||
492 | 503 | ||
493 | static inline mddev_t *mddev_get(mddev_t *mddev) | 504 | static inline mddev_t *mddev_get(mddev_t *mddev) |
494 | { | 505 | { |
@@ -538,6 +549,7 @@ void mddev_init(mddev_t *mddev) | |||
538 | atomic_set(&mddev->active, 1); | 549 | atomic_set(&mddev->active, 1); |
539 | atomic_set(&mddev->openers, 0); | 550 | atomic_set(&mddev->openers, 0); |
540 | atomic_set(&mddev->active_io, 0); | 551 | atomic_set(&mddev->active_io, 0); |
552 | atomic_set(&mddev->plug_cnt, 0); | ||
541 | spin_lock_init(&mddev->write_lock); | 553 | spin_lock_init(&mddev->write_lock); |
542 | atomic_set(&mddev->flush_pending, 0); | 554 | atomic_set(&mddev->flush_pending, 0); |
543 | init_waitqueue_head(&mddev->sb_wait); | 555 | init_waitqueue_head(&mddev->sb_wait); |
@@ -4723,7 +4735,6 @@ static void md_clean(mddev_t *mddev) | |||
4723 | mddev->bitmap_info.chunksize = 0; | 4735 | mddev->bitmap_info.chunksize = 0; |
4724 | mddev->bitmap_info.daemon_sleep = 0; | 4736 | mddev->bitmap_info.daemon_sleep = 0; |
4725 | mddev->bitmap_info.max_write_behind = 0; | 4737 | mddev->bitmap_info.max_write_behind = 0; |
4726 | mddev->plug = NULL; | ||
4727 | } | 4738 | } |
4728 | 4739 | ||
4729 | static void __md_stop_writes(mddev_t *mddev) | 4740 | static void __md_stop_writes(mddev_t *mddev) |
@@ -6266,7 +6277,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
6266 | * rt is a sector_t, so could be 32bit or 64bit. | 6277 | * rt is a sector_t, so could be 32bit or 64bit. |
6267 | * So we divide before multiply in case it is 32bit and close | 6278 | * So we divide before multiply in case it is 32bit and close |
6268 | * to the limit. | 6279 | * to the limit. |
6269 | * We scale the divisor (db) by 32 to avoid loosing precision | 6280 | * We scale the divisor (db) by 32 to avoid losing precision |
6270 | * near the end of resync when the number of remaining sectors | 6281 | * near the end of resync when the number of remaining sectors |
6271 | * is close to 'db'. | 6282 | * is close to 'db'. |
6272 | * We then divide rt by 32 after multiplying by db to compensate. | 6283 | * We then divide rt by 32 after multiplying by db to compensate. |
@@ -6688,12 +6699,6 @@ int md_allow_write(mddev_t *mddev) | |||
6688 | } | 6699 | } |
6689 | EXPORT_SYMBOL_GPL(md_allow_write); | 6700 | EXPORT_SYMBOL_GPL(md_allow_write); |
6690 | 6701 | ||
6691 | void md_unplug(mddev_t *mddev) | ||
6692 | { | ||
6693 | if (mddev->plug) | ||
6694 | mddev->plug->unplug_fn(mddev->plug); | ||
6695 | } | ||
6696 | |||
6697 | #define SYNC_MARKS 10 | 6702 | #define SYNC_MARKS 10 |
6698 | #define SYNC_MARK_STEP (3*HZ) | 6703 | #define SYNC_MARK_STEP (3*HZ) |
6699 | void md_do_sync(mddev_t *mddev) | 6704 | void md_do_sync(mddev_t *mddev) |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 12215d437fcc..0b1fd3f1d85b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -29,26 +29,6 @@ | |||
29 | typedef struct mddev_s mddev_t; | 29 | typedef struct mddev_s mddev_t; |
30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
31 | 31 | ||
32 | /* generic plugging support - like that provided with request_queue, | ||
33 | * but does not require a request_queue | ||
34 | */ | ||
35 | struct plug_handle { | ||
36 | void (*unplug_fn)(struct plug_handle *); | ||
37 | struct timer_list unplug_timer; | ||
38 | struct work_struct unplug_work; | ||
39 | unsigned long unplug_flag; | ||
40 | }; | ||
41 | #define PLUGGED_FLAG 1 | ||
42 | void plugger_init(struct plug_handle *plug, | ||
43 | void (*unplug_fn)(struct plug_handle *)); | ||
44 | void plugger_set_plug(struct plug_handle *plug); | ||
45 | int plugger_remove_plug(struct plug_handle *plug); | ||
46 | static inline void plugger_flush(struct plug_handle *plug) | ||
47 | { | ||
48 | del_timer_sync(&plug->unplug_timer); | ||
49 | cancel_work_sync(&plug->unplug_work); | ||
50 | } | ||
51 | |||
52 | /* | 32 | /* |
53 | * MD's 'extended' device | 33 | * MD's 'extended' device |
54 | */ | 34 | */ |
@@ -94,7 +74,7 @@ struct mdk_rdev_s | |||
94 | #define In_sync 2 /* device is in_sync with rest of array */ | 74 | #define In_sync 2 /* device is in_sync with rest of array */ |
95 | #define WriteMostly 4 /* Avoid reading if at all possible */ | 75 | #define WriteMostly 4 /* Avoid reading if at all possible */ |
96 | #define AutoDetected 7 /* added by auto-detect */ | 76 | #define AutoDetected 7 /* added by auto-detect */ |
97 | #define Blocked 8 /* An error occured on an externally | 77 | #define Blocked 8 /* An error occurred on an externally |
98 | * managed array, don't allow writes | 78 | * managed array, don't allow writes |
99 | * until it is cleared */ | 79 | * until it is cleared */ |
100 | wait_queue_head_t blocked_wait; | 80 | wait_queue_head_t blocked_wait; |
@@ -199,6 +179,9 @@ struct mddev_s | |||
199 | int delta_disks, new_level, new_layout; | 179 | int delta_disks, new_level, new_layout; |
200 | int new_chunk_sectors; | 180 | int new_chunk_sectors; |
201 | 181 | ||
182 | atomic_t plug_cnt; /* If device is expecting | ||
183 | * more bios soon. | ||
184 | */ | ||
202 | struct mdk_thread_s *thread; /* management thread */ | 185 | struct mdk_thread_s *thread; /* management thread */ |
203 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ | 186 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ |
204 | sector_t curr_resync; /* last block scheduled */ | 187 | sector_t curr_resync; /* last block scheduled */ |
@@ -336,7 +319,6 @@ struct mddev_s | |||
336 | struct list_head all_mddevs; | 319 | struct list_head all_mddevs; |
337 | 320 | ||
338 | struct attribute_group *to_remove; | 321 | struct attribute_group *to_remove; |
339 | struct plug_handle *plug; /* if used by personality */ | ||
340 | 322 | ||
341 | struct bio_set *bio_set; | 323 | struct bio_set *bio_set; |
342 | 324 | ||
@@ -516,7 +498,6 @@ extern int md_integrity_register(mddev_t *mddev); | |||
516 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 498 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
517 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); | 499 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); |
518 | extern void restore_bitmap_write_access(struct file *file); | 500 | extern void restore_bitmap_write_access(struct file *file); |
519 | extern void md_unplug(mddev_t *mddev); | ||
520 | 501 | ||
521 | extern void mddev_init(mddev_t *mddev); | 502 | extern void mddev_init(mddev_t *mddev); |
522 | extern int md_run(mddev_t *mddev); | 503 | extern int md_run(mddev_t *mddev); |
@@ -530,4 +511,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | |||
530 | mddev_t *mddev); | 511 | mddev_t *mddev); |
531 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | 512 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, |
532 | mddev_t *mddev); | 513 | mddev_t *mddev); |
514 | extern int mddev_check_plugged(mddev_t *mddev); | ||
533 | #endif /* _MD_MD_H */ | 515 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c2a21ae56d97..2b7a7ff401dc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -565,12 +565,6 @@ static void flush_pending_writes(conf_t *conf) | |||
565 | spin_unlock_irq(&conf->device_lock); | 565 | spin_unlock_irq(&conf->device_lock); |
566 | } | 566 | } |
567 | 567 | ||
568 | static void md_kick_device(mddev_t *mddev) | ||
569 | { | ||
570 | blk_flush_plug(current); | ||
571 | md_wakeup_thread(mddev->thread); | ||
572 | } | ||
573 | |||
574 | /* Barriers.... | 568 | /* Barriers.... |
575 | * Sometimes we need to suspend IO while we do something else, | 569 | * Sometimes we need to suspend IO while we do something else, |
576 | * either some resync/recovery, or reconfigure the array. | 570 | * either some resync/recovery, or reconfigure the array. |
@@ -600,7 +594,7 @@ static void raise_barrier(conf_t *conf) | |||
600 | 594 | ||
601 | /* Wait until no block IO is waiting */ | 595 | /* Wait until no block IO is waiting */ |
602 | wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, | 596 | wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, |
603 | conf->resync_lock, md_kick_device(conf->mddev)); | 597 | conf->resync_lock, ); |
604 | 598 | ||
605 | /* block any new IO from starting */ | 599 | /* block any new IO from starting */ |
606 | conf->barrier++; | 600 | conf->barrier++; |
@@ -608,7 +602,7 @@ static void raise_barrier(conf_t *conf) | |||
608 | /* Now wait for all pending IO to complete */ | 602 | /* Now wait for all pending IO to complete */ |
609 | wait_event_lock_irq(conf->wait_barrier, | 603 | wait_event_lock_irq(conf->wait_barrier, |
610 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 604 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, |
611 | conf->resync_lock, md_kick_device(conf->mddev)); | 605 | conf->resync_lock, ); |
612 | 606 | ||
613 | spin_unlock_irq(&conf->resync_lock); | 607 | spin_unlock_irq(&conf->resync_lock); |
614 | } | 608 | } |
@@ -630,7 +624,7 @@ static void wait_barrier(conf_t *conf) | |||
630 | conf->nr_waiting++; | 624 | conf->nr_waiting++; |
631 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 625 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, |
632 | conf->resync_lock, | 626 | conf->resync_lock, |
633 | md_kick_device(conf->mddev)); | 627 | ); |
634 | conf->nr_waiting--; | 628 | conf->nr_waiting--; |
635 | } | 629 | } |
636 | conf->nr_pending++; | 630 | conf->nr_pending++; |
@@ -666,8 +660,7 @@ static void freeze_array(conf_t *conf) | |||
666 | wait_event_lock_irq(conf->wait_barrier, | 660 | wait_event_lock_irq(conf->wait_barrier, |
667 | conf->nr_pending == conf->nr_queued+1, | 661 | conf->nr_pending == conf->nr_queued+1, |
668 | conf->resync_lock, | 662 | conf->resync_lock, |
669 | ({ flush_pending_writes(conf); | 663 | flush_pending_writes(conf)); |
670 | md_kick_device(conf->mddev); })); | ||
671 | spin_unlock_irq(&conf->resync_lock); | 664 | spin_unlock_irq(&conf->resync_lock); |
672 | } | 665 | } |
673 | static void unfreeze_array(conf_t *conf) | 666 | static void unfreeze_array(conf_t *conf) |
@@ -729,6 +722,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
729 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 722 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
730 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 723 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
731 | mdk_rdev_t *blocked_rdev; | 724 | mdk_rdev_t *blocked_rdev; |
725 | int plugged; | ||
732 | 726 | ||
733 | /* | 727 | /* |
734 | * Register the new request and wait if the reconstruction | 728 | * Register the new request and wait if the reconstruction |
@@ -820,6 +814,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
820 | * inc refcount on their rdev. Record them by setting | 814 | * inc refcount on their rdev. Record them by setting |
821 | * bios[x] to bio | 815 | * bios[x] to bio |
822 | */ | 816 | */ |
817 | plugged = mddev_check_plugged(mddev); | ||
818 | |||
823 | disks = conf->raid_disks; | 819 | disks = conf->raid_disks; |
824 | retry_write: | 820 | retry_write: |
825 | blocked_rdev = NULL; | 821 | blocked_rdev = NULL; |
@@ -925,7 +921,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
925 | /* In case raid1d snuck in to freeze_array */ | 921 | /* In case raid1d snuck in to freeze_array */ |
926 | wake_up(&conf->wait_barrier); | 922 | wake_up(&conf->wait_barrier); |
927 | 923 | ||
928 | if (do_sync || !bitmap) | 924 | if (do_sync || !bitmap || !plugged) |
929 | md_wakeup_thread(mddev->thread); | 925 | md_wakeup_thread(mddev->thread); |
930 | 926 | ||
931 | return 0; | 927 | return 0; |
@@ -1516,13 +1512,16 @@ static void raid1d(mddev_t *mddev) | |||
1516 | conf_t *conf = mddev->private; | 1512 | conf_t *conf = mddev->private; |
1517 | struct list_head *head = &conf->retry_list; | 1513 | struct list_head *head = &conf->retry_list; |
1518 | mdk_rdev_t *rdev; | 1514 | mdk_rdev_t *rdev; |
1515 | struct blk_plug plug; | ||
1519 | 1516 | ||
1520 | md_check_recovery(mddev); | 1517 | md_check_recovery(mddev); |
1521 | 1518 | ||
1519 | blk_start_plug(&plug); | ||
1522 | for (;;) { | 1520 | for (;;) { |
1523 | char b[BDEVNAME_SIZE]; | 1521 | char b[BDEVNAME_SIZE]; |
1524 | 1522 | ||
1525 | flush_pending_writes(conf); | 1523 | if (atomic_read(&mddev->plug_cnt) == 0) |
1524 | flush_pending_writes(conf); | ||
1526 | 1525 | ||
1527 | spin_lock_irqsave(&conf->device_lock, flags); | 1526 | spin_lock_irqsave(&conf->device_lock, flags); |
1528 | if (list_empty(head)) { | 1527 | if (list_empty(head)) { |
@@ -1593,6 +1592,7 @@ static void raid1d(mddev_t *mddev) | |||
1593 | } | 1592 | } |
1594 | cond_resched(); | 1593 | cond_resched(); |
1595 | } | 1594 | } |
1595 | blk_finish_plug(&plug); | ||
1596 | } | 1596 | } |
1597 | 1597 | ||
1598 | 1598 | ||
@@ -2039,7 +2039,6 @@ static int stop(mddev_t *mddev) | |||
2039 | 2039 | ||
2040 | md_unregister_thread(mddev->thread); | 2040 | md_unregister_thread(mddev->thread); |
2041 | mddev->thread = NULL; | 2041 | mddev->thread = NULL; |
2042 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
2043 | if (conf->r1bio_pool) | 2042 | if (conf->r1bio_pool) |
2044 | mempool_destroy(conf->r1bio_pool); | 2043 | mempool_destroy(conf->r1bio_pool); |
2045 | kfree(conf->mirrors); | 2044 | kfree(conf->mirrors); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f7b62370b374..8e9462626ec5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * RAID-10 support for md. | 6 | * RAID-10 support for md. |
7 | * | 7 | * |
8 | * Base on code in raid1.c. See raid1.c for futher copyright information. | 8 | * Base on code in raid1.c. See raid1.c for further copyright information. |
9 | * | 9 | * |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
@@ -340,14 +340,14 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
340 | 340 | ||
341 | /* | 341 | /* |
342 | * RAID10 layout manager | 342 | * RAID10 layout manager |
343 | * Aswell as the chunksize and raid_disks count, there are two | 343 | * As well as the chunksize and raid_disks count, there are two |
344 | * parameters: near_copies and far_copies. | 344 | * parameters: near_copies and far_copies. |
345 | * near_copies * far_copies must be <= raid_disks. | 345 | * near_copies * far_copies must be <= raid_disks. |
346 | * Normally one of these will be 1. | 346 | * Normally one of these will be 1. |
347 | * If both are 1, we get raid0. | 347 | * If both are 1, we get raid0. |
348 | * If near_copies == raid_disks, we get raid1. | 348 | * If near_copies == raid_disks, we get raid1. |
349 | * | 349 | * |
350 | * Chunks are layed out in raid0 style with near_copies copies of the | 350 | * Chunks are laid out in raid0 style with near_copies copies of the |
351 | * first chunk, followed by near_copies copies of the next chunk and | 351 | * first chunk, followed by near_copies copies of the next chunk and |
352 | * so on. | 352 | * so on. |
353 | * If far_copies > 1, then after 1/far_copies of the array has been assigned | 353 | * If far_copies > 1, then after 1/far_copies of the array has been assigned |
@@ -634,12 +634,6 @@ static void flush_pending_writes(conf_t *conf) | |||
634 | spin_unlock_irq(&conf->device_lock); | 634 | spin_unlock_irq(&conf->device_lock); |
635 | } | 635 | } |
636 | 636 | ||
637 | static void md_kick_device(mddev_t *mddev) | ||
638 | { | ||
639 | blk_flush_plug(current); | ||
640 | md_wakeup_thread(mddev->thread); | ||
641 | } | ||
642 | |||
643 | /* Barriers.... | 637 | /* Barriers.... |
644 | * Sometimes we need to suspend IO while we do something else, | 638 | * Sometimes we need to suspend IO while we do something else, |
645 | * either some resync/recovery, or reconfigure the array. | 639 | * either some resync/recovery, or reconfigure the array. |
@@ -669,15 +663,15 @@ static void raise_barrier(conf_t *conf, int force) | |||
669 | 663 | ||
670 | /* Wait until no block IO is waiting (unless 'force') */ | 664 | /* Wait until no block IO is waiting (unless 'force') */ |
671 | wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, | 665 | wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, |
672 | conf->resync_lock, md_kick_device(conf->mddev)); | 666 | conf->resync_lock, ); |
673 | 667 | ||
674 | /* block any new IO from starting */ | 668 | /* block any new IO from starting */ |
675 | conf->barrier++; | 669 | conf->barrier++; |
676 | 670 | ||
677 | /* No wait for all pending IO to complete */ | 671 | /* Now wait for all pending IO to complete */ |
678 | wait_event_lock_irq(conf->wait_barrier, | 672 | wait_event_lock_irq(conf->wait_barrier, |
679 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 673 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, |
680 | conf->resync_lock, md_kick_device(conf->mddev)); | 674 | conf->resync_lock, ); |
681 | 675 | ||
682 | spin_unlock_irq(&conf->resync_lock); | 676 | spin_unlock_irq(&conf->resync_lock); |
683 | } | 677 | } |
@@ -698,7 +692,7 @@ static void wait_barrier(conf_t *conf) | |||
698 | conf->nr_waiting++; | 692 | conf->nr_waiting++; |
699 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 693 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, |
700 | conf->resync_lock, | 694 | conf->resync_lock, |
701 | md_kick_device(conf->mddev)); | 695 | ); |
702 | conf->nr_waiting--; | 696 | conf->nr_waiting--; |
703 | } | 697 | } |
704 | conf->nr_pending++; | 698 | conf->nr_pending++; |
@@ -734,8 +728,8 @@ static void freeze_array(conf_t *conf) | |||
734 | wait_event_lock_irq(conf->wait_barrier, | 728 | wait_event_lock_irq(conf->wait_barrier, |
735 | conf->nr_pending == conf->nr_queued+1, | 729 | conf->nr_pending == conf->nr_queued+1, |
736 | conf->resync_lock, | 730 | conf->resync_lock, |
737 | ({ flush_pending_writes(conf); | 731 | flush_pending_writes(conf)); |
738 | md_kick_device(conf->mddev); })); | 732 | |
739 | spin_unlock_irq(&conf->resync_lock); | 733 | spin_unlock_irq(&conf->resync_lock); |
740 | } | 734 | } |
741 | 735 | ||
@@ -762,6 +756,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
762 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 756 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
763 | unsigned long flags; | 757 | unsigned long flags; |
764 | mdk_rdev_t *blocked_rdev; | 758 | mdk_rdev_t *blocked_rdev; |
759 | int plugged; | ||
765 | 760 | ||
766 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { | 761 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
767 | md_flush_request(mddev, bio); | 762 | md_flush_request(mddev, bio); |
@@ -870,6 +865,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
870 | * inc refcount on their rdev. Record them by setting | 865 | * inc refcount on their rdev. Record them by setting |
871 | * bios[x] to bio | 866 | * bios[x] to bio |
872 | */ | 867 | */ |
868 | plugged = mddev_check_plugged(mddev); | ||
869 | |||
873 | raid10_find_phys(conf, r10_bio); | 870 | raid10_find_phys(conf, r10_bio); |
874 | retry_write: | 871 | retry_write: |
875 | blocked_rdev = NULL; | 872 | blocked_rdev = NULL; |
@@ -946,9 +943,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
946 | /* In case raid10d snuck in to freeze_array */ | 943 | /* In case raid10d snuck in to freeze_array */ |
947 | wake_up(&conf->wait_barrier); | 944 | wake_up(&conf->wait_barrier); |
948 | 945 | ||
949 | if (do_sync || !mddev->bitmap) | 946 | if (do_sync || !mddev->bitmap || !plugged) |
950 | md_wakeup_thread(mddev->thread); | 947 | md_wakeup_thread(mddev->thread); |
951 | |||
952 | return 0; | 948 | return 0; |
953 | } | 949 | } |
954 | 950 | ||
@@ -1640,9 +1636,11 @@ static void raid10d(mddev_t *mddev) | |||
1640 | conf_t *conf = mddev->private; | 1636 | conf_t *conf = mddev->private; |
1641 | struct list_head *head = &conf->retry_list; | 1637 | struct list_head *head = &conf->retry_list; |
1642 | mdk_rdev_t *rdev; | 1638 | mdk_rdev_t *rdev; |
1639 | struct blk_plug plug; | ||
1643 | 1640 | ||
1644 | md_check_recovery(mddev); | 1641 | md_check_recovery(mddev); |
1645 | 1642 | ||
1643 | blk_start_plug(&plug); | ||
1646 | for (;;) { | 1644 | for (;;) { |
1647 | char b[BDEVNAME_SIZE]; | 1645 | char b[BDEVNAME_SIZE]; |
1648 | 1646 | ||
@@ -1716,6 +1714,7 @@ static void raid10d(mddev_t *mddev) | |||
1716 | } | 1714 | } |
1717 | cond_resched(); | 1715 | cond_resched(); |
1718 | } | 1716 | } |
1717 | blk_finish_plug(&plug); | ||
1719 | } | 1718 | } |
1720 | 1719 | ||
1721 | 1720 | ||
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 2316ac2e8e21..944b1104d3b4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -17,8 +17,8 @@ struct r10_private_data_s { | |||
17 | spinlock_t device_lock; | 17 | spinlock_t device_lock; |
18 | 18 | ||
19 | /* geometry */ | 19 | /* geometry */ |
20 | int near_copies; /* number of copies layed out raid0 style */ | 20 | int near_copies; /* number of copies laid out raid0 style */ |
21 | int far_copies; /* number of copies layed out | 21 | int far_copies; /* number of copies laid out |
22 | * at large strides across drives | 22 | * at large strides across drives |
23 | */ | 23 | */ |
24 | int far_offset; /* far_copies are offset by 1 stripe | 24 | int far_offset; /* far_copies are offset by 1 stripe |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e867ee42b152..f301e6ae220c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -27,12 +27,12 @@ | |||
27 | * | 27 | * |
28 | * We group bitmap updates into batches. Each batch has a number. | 28 | * We group bitmap updates into batches. Each batch has a number. |
29 | * We may write out several batches at once, but that isn't very important. | 29 | * We may write out several batches at once, but that isn't very important. |
30 | * conf->bm_write is the number of the last batch successfully written. | 30 | * conf->seq_write is the number of the last batch successfully written. |
31 | * conf->bm_flush is the number of the last batch that was closed to | 31 | * conf->seq_flush is the number of the last batch that was closed to |
32 | * new additions. | 32 | * new additions. |
33 | * When we discover that we will need to write to any block in a stripe | 33 | * When we discover that we will need to write to any block in a stripe |
34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq | 34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq |
35 | * the number of the batch it will be in. This is bm_flush+1. | 35 | * the number of the batch it will be in. This is seq_flush+1. |
36 | * When we are ready to do a write, if that batch hasn't been written yet, | 36 | * When we are ready to do a write, if that batch hasn't been written yet, |
37 | * we plug the array and queue the stripe for later. | 37 | * we plug the array and queue the stripe for later. |
38 | * When an unplug happens, we increment bm_flush, thus closing the current | 38 | * When an unplug happens, we increment bm_flush, thus closing the current |
@@ -199,14 +199,12 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
199 | BUG_ON(!list_empty(&sh->lru)); | 199 | BUG_ON(!list_empty(&sh->lru)); |
200 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 200 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { | 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) |
203 | list_add_tail(&sh->lru, &conf->delayed_list); | 203 | list_add_tail(&sh->lru, &conf->delayed_list); |
204 | plugger_set_plug(&conf->plug); | 204 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 205 | sh->bm_seq - conf->seq_write > 0) |
206 | sh->bm_seq - conf->seq_write > 0) { | ||
207 | list_add_tail(&sh->lru, &conf->bitmap_list); | 206 | list_add_tail(&sh->lru, &conf->bitmap_list); |
208 | plugger_set_plug(&conf->plug); | 207 | else { |
209 | } else { | ||
210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 208 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
211 | list_add_tail(&sh->lru, &conf->handle_list); | 209 | list_add_tail(&sh->lru, &conf->handle_list); |
212 | } | 210 | } |
@@ -461,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, | |||
461 | < (conf->max_nr_stripes *3/4) | 459 | < (conf->max_nr_stripes *3/4) |
462 | || !conf->inactive_blocked), | 460 | || !conf->inactive_blocked), |
463 | conf->device_lock, | 461 | conf->device_lock, |
464 | md_raid5_kick_device(conf)); | 462 | ); |
465 | conf->inactive_blocked = 0; | 463 | conf->inactive_blocked = 0; |
466 | } else | 464 | } else |
467 | init_stripe(sh, sector, previous); | 465 | init_stripe(sh, sector, previous); |
@@ -1470,7 +1468,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
1470 | wait_event_lock_irq(conf->wait_for_stripe, | 1468 | wait_event_lock_irq(conf->wait_for_stripe, |
1471 | !list_empty(&conf->inactive_list), | 1469 | !list_empty(&conf->inactive_list), |
1472 | conf->device_lock, | 1470 | conf->device_lock, |
1473 | blk_flush_plug(current)); | 1471 | ); |
1474 | osh = get_free_stripe(conf); | 1472 | osh = get_free_stripe(conf); |
1475 | spin_unlock_irq(&conf->device_lock); | 1473 | spin_unlock_irq(&conf->device_lock); |
1476 | atomic_set(&nsh->count, 1); | 1474 | atomic_set(&nsh->count, 1); |
@@ -3623,8 +3621,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
3623 | atomic_inc(&conf->preread_active_stripes); | 3621 | atomic_inc(&conf->preread_active_stripes); |
3624 | list_add_tail(&sh->lru, &conf->hold_list); | 3622 | list_add_tail(&sh->lru, &conf->hold_list); |
3625 | } | 3623 | } |
3626 | } else | 3624 | } |
3627 | plugger_set_plug(&conf->plug); | ||
3628 | } | 3625 | } |
3629 | 3626 | ||
3630 | static void activate_bit_delay(raid5_conf_t *conf) | 3627 | static void activate_bit_delay(raid5_conf_t *conf) |
@@ -3641,21 +3638,6 @@ static void activate_bit_delay(raid5_conf_t *conf) | |||
3641 | } | 3638 | } |
3642 | } | 3639 | } |
3643 | 3640 | ||
3644 | void md_raid5_kick_device(raid5_conf_t *conf) | ||
3645 | { | ||
3646 | blk_flush_plug(current); | ||
3647 | raid5_activate_delayed(conf); | ||
3648 | md_wakeup_thread(conf->mddev->thread); | ||
3649 | } | ||
3650 | EXPORT_SYMBOL_GPL(md_raid5_kick_device); | ||
3651 | |||
3652 | static void raid5_unplug(struct plug_handle *plug) | ||
3653 | { | ||
3654 | raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); | ||
3655 | |||
3656 | md_raid5_kick_device(conf); | ||
3657 | } | ||
3658 | |||
3659 | int md_raid5_congested(mddev_t *mddev, int bits) | 3641 | int md_raid5_congested(mddev_t *mddev, int bits) |
3660 | { | 3642 | { |
3661 | raid5_conf_t *conf = mddev->private; | 3643 | raid5_conf_t *conf = mddev->private; |
@@ -3945,6 +3927,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
3945 | struct stripe_head *sh; | 3927 | struct stripe_head *sh; |
3946 | const int rw = bio_data_dir(bi); | 3928 | const int rw = bio_data_dir(bi); |
3947 | int remaining; | 3929 | int remaining; |
3930 | int plugged; | ||
3948 | 3931 | ||
3949 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { | 3932 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
3950 | md_flush_request(mddev, bi); | 3933 | md_flush_request(mddev, bi); |
@@ -3963,6 +3946,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
3963 | bi->bi_next = NULL; | 3946 | bi->bi_next = NULL; |
3964 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 3947 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
3965 | 3948 | ||
3949 | plugged = mddev_check_plugged(mddev); | ||
3966 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 3950 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
3967 | DEFINE_WAIT(w); | 3951 | DEFINE_WAIT(w); |
3968 | int disks, data_disks; | 3952 | int disks, data_disks; |
@@ -4057,7 +4041,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4057 | * add failed due to overlap. Flush everything | 4041 | * add failed due to overlap. Flush everything |
4058 | * and wait a while | 4042 | * and wait a while |
4059 | */ | 4043 | */ |
4060 | md_raid5_kick_device(conf); | 4044 | md_wakeup_thread(mddev->thread); |
4061 | release_stripe(sh); | 4045 | release_stripe(sh); |
4062 | schedule(); | 4046 | schedule(); |
4063 | goto retry; | 4047 | goto retry; |
@@ -4077,6 +4061,9 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4077 | } | 4061 | } |
4078 | 4062 | ||
4079 | } | 4063 | } |
4064 | if (!plugged) | ||
4065 | md_wakeup_thread(mddev->thread); | ||
4066 | |||
4080 | spin_lock_irq(&conf->device_lock); | 4067 | spin_lock_irq(&conf->device_lock); |
4081 | remaining = raid5_dec_bi_phys_segments(bi); | 4068 | remaining = raid5_dec_bi_phys_segments(bi); |
4082 | spin_unlock_irq(&conf->device_lock); | 4069 | spin_unlock_irq(&conf->device_lock); |
@@ -4478,24 +4465,30 @@ static void raid5d(mddev_t *mddev) | |||
4478 | struct stripe_head *sh; | 4465 | struct stripe_head *sh; |
4479 | raid5_conf_t *conf = mddev->private; | 4466 | raid5_conf_t *conf = mddev->private; |
4480 | int handled; | 4467 | int handled; |
4468 | struct blk_plug plug; | ||
4481 | 4469 | ||
4482 | pr_debug("+++ raid5d active\n"); | 4470 | pr_debug("+++ raid5d active\n"); |
4483 | 4471 | ||
4484 | md_check_recovery(mddev); | 4472 | md_check_recovery(mddev); |
4485 | 4473 | ||
4474 | blk_start_plug(&plug); | ||
4486 | handled = 0; | 4475 | handled = 0; |
4487 | spin_lock_irq(&conf->device_lock); | 4476 | spin_lock_irq(&conf->device_lock); |
4488 | while (1) { | 4477 | while (1) { |
4489 | struct bio *bio; | 4478 | struct bio *bio; |
4490 | 4479 | ||
4491 | if (conf->seq_flush != conf->seq_write) { | 4480 | if (atomic_read(&mddev->plug_cnt) == 0 && |
4492 | int seq = conf->seq_flush; | 4481 | !list_empty(&conf->bitmap_list)) { |
4482 | /* Now is a good time to flush some bitmap updates */ | ||
4483 | conf->seq_flush++; | ||
4493 | spin_unlock_irq(&conf->device_lock); | 4484 | spin_unlock_irq(&conf->device_lock); |
4494 | bitmap_unplug(mddev->bitmap); | 4485 | bitmap_unplug(mddev->bitmap); |
4495 | spin_lock_irq(&conf->device_lock); | 4486 | spin_lock_irq(&conf->device_lock); |
4496 | conf->seq_write = seq; | 4487 | conf->seq_write = conf->seq_flush; |
4497 | activate_bit_delay(conf); | 4488 | activate_bit_delay(conf); |
4498 | } | 4489 | } |
4490 | if (atomic_read(&mddev->plug_cnt) == 0) | ||
4491 | raid5_activate_delayed(conf); | ||
4499 | 4492 | ||
4500 | while ((bio = remove_bio_from_retry(conf))) { | 4493 | while ((bio = remove_bio_from_retry(conf))) { |
4501 | int ok; | 4494 | int ok; |
@@ -4525,6 +4518,7 @@ static void raid5d(mddev_t *mddev) | |||
4525 | spin_unlock_irq(&conf->device_lock); | 4518 | spin_unlock_irq(&conf->device_lock); |
4526 | 4519 | ||
4527 | async_tx_issue_pending_all(); | 4520 | async_tx_issue_pending_all(); |
4521 | blk_finish_plug(&plug); | ||
4528 | 4522 | ||
4529 | pr_debug("--- raid5d inactive\n"); | 4523 | pr_debug("--- raid5d inactive\n"); |
4530 | } | 4524 | } |
@@ -5141,8 +5135,6 @@ static int run(mddev_t *mddev) | |||
5141 | mdname(mddev)); | 5135 | mdname(mddev)); |
5142 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5136 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
5143 | 5137 | ||
5144 | plugger_init(&conf->plug, raid5_unplug); | ||
5145 | mddev->plug = &conf->plug; | ||
5146 | if (mddev->queue) { | 5138 | if (mddev->queue) { |
5147 | int chunk_size; | 5139 | int chunk_size; |
5148 | /* read-ahead size must cover two whole stripes, which | 5140 | /* read-ahead size must cover two whole stripes, which |
@@ -5192,7 +5184,6 @@ static int stop(mddev_t *mddev) | |||
5192 | mddev->thread = NULL; | 5184 | mddev->thread = NULL; |
5193 | if (mddev->queue) | 5185 | if (mddev->queue) |
5194 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5186 | mddev->queue->backing_dev_info.congested_fn = NULL; |
5195 | plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ | ||
5196 | free_conf(conf); | 5187 | free_conf(conf); |
5197 | mddev->private = NULL; | 5188 | mddev->private = NULL; |
5198 | mddev->to_remove = &raid5_attrs_group; | 5189 | mddev->to_remove = &raid5_attrs_group; |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 8d563a4f022a..3ca77a2613ba 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -400,8 +400,6 @@ struct raid5_private_data { | |||
400 | * Cleared when a sync completes. | 400 | * Cleared when a sync completes. |
401 | */ | 401 | */ |
402 | 402 | ||
403 | struct plug_handle plug; | ||
404 | |||
405 | /* per cpu variables */ | 403 | /* per cpu variables */ |
406 | struct raid5_percpu { | 404 | struct raid5_percpu { |
407 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 405 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |