diff options
| author | James Morris <jmorris@namei.org> | 2011-04-19 07:32:41 -0400 |
|---|---|---|
| committer | James Morris <jmorris@namei.org> | 2011-04-19 07:32:41 -0400 |
| commit | d4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch) | |
| tree | eefd82c155bc27469a85667d759cd90facf4a6e3 /drivers/md | |
| parent | c0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff) | |
| parent | 96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff) | |
Merge branch 'master'; commit 'v2.6.39-rc3' into next
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/bitmap.h | 2 | ||||
| -rw-r--r-- | drivers/md/dm-raid.c | 8 | ||||
| -rw-r--r-- | drivers/md/dm-region-hash.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 114 | ||||
| -rw-r--r-- | drivers/md/faulty.c | 2 | ||||
| -rw-r--r-- | drivers/md/md.c | 89 | ||||
| -rw-r--r-- | drivers/md/md.h | 28 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 29 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 33 | ||||
| -rw-r--r-- | drivers/md/raid10.h | 4 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 61 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 2 |
12 files changed, 193 insertions, 181 deletions
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 931a7a7c3796..d0aeaf46d932 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
| @@ -45,7 +45,7 @@ | |||
| 45 | * | 45 | * |
| 46 | * The counter counts pending write requests, plus the on-disk bit. | 46 | * The counter counts pending write requests, plus the on-disk bit. |
| 47 | * When the counter is '1' and the resync bits are clear, the on-disk | 47 | * When the counter is '1' and the resync bits are clear, the on-disk |
| 48 | * bit can be cleared aswell, thus setting the counter to 0. | 48 | * bit can be cleared as well, thus setting the counter to 0. |
| 49 | * When we set a bit, or in the counter (to start a write), if the fields is | 49 | * When we set a bit, or in the counter (to start a write), if the fields is |
| 50 | * 0, we first set the disk bit and set the counter to 1. | 50 | * 0, we first set the disk bit and set the counter to 1. |
| 51 | * | 51 | * |
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5ef136cdba91..e5d8904fc8f6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c | |||
| @@ -390,13 +390,6 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) | |||
| 390 | return md_raid5_congested(&rs->md, bits); | 390 | return md_raid5_congested(&rs->md, bits); |
| 391 | } | 391 | } |
| 392 | 392 | ||
| 393 | static void raid_unplug(struct dm_target_callbacks *cb) | ||
| 394 | { | ||
| 395 | struct raid_set *rs = container_of(cb, struct raid_set, callbacks); | ||
| 396 | |||
| 397 | md_raid5_kick_device(rs->md.private); | ||
| 398 | } | ||
| 399 | |||
| 400 | /* | 393 | /* |
| 401 | * Construct a RAID4/5/6 mapping: | 394 | * Construct a RAID4/5/6 mapping: |
| 402 | * Args: | 395 | * Args: |
| @@ -487,7 +480,6 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
| 487 | } | 480 | } |
| 488 | 481 | ||
| 489 | rs->callbacks.congested_fn = raid_is_congested; | 482 | rs->callbacks.congested_fn = raid_is_congested; |
| 490 | rs->callbacks.unplug_fn = raid_unplug; | ||
| 491 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); | 483 | dm_table_add_target_callbacks(ti->table, &rs->callbacks); |
| 492 | 484 | ||
| 493 | return 0; | 485 | return 0; |
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index dad011aed0c9..7771ed212182 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c | |||
| @@ -419,7 +419,7 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) | |||
| 419 | /* | 419 | /* |
| 420 | * Possible cases: | 420 | * Possible cases: |
| 421 | * 1) DM_RH_DIRTY | 421 | * 1) DM_RH_DIRTY |
| 422 | * 2) DM_RH_NOSYNC: was dirty, other preceeding writes failed | 422 | * 2) DM_RH_NOSYNC: was dirty, other preceding writes failed |
| 423 | * 3) DM_RH_RECOVERING: flushing pending writes | 423 | * 3) DM_RH_RECOVERING: flushing pending writes |
| 424 | * Either case, the region should have not been connected to list. | 424 | * Either case, the region should have not been connected to list. |
| 425 | */ | 425 | */ |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 416d4e258df6..cb8380c9767f 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
| @@ -927,20 +927,80 @@ static int dm_table_build_index(struct dm_table *t) | |||
| 927 | } | 927 | } |
| 928 | 928 | ||
| 929 | /* | 929 | /* |
| 930 | * Get a disk whose integrity profile reflects the table's profile. | ||
| 931 | * If %match_all is true, all devices' profiles must match. | ||
| 932 | * If %match_all is false, all devices must at least have an | ||
| 933 | * allocated integrity profile; but uninitialized is ok. | ||
| 934 | * Returns NULL if integrity support was inconsistent or unavailable. | ||
| 935 | */ | ||
| 936 | static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, | ||
| 937 | bool match_all) | ||
| 938 | { | ||
| 939 | struct list_head *devices = dm_table_get_devices(t); | ||
| 940 | struct dm_dev_internal *dd = NULL; | ||
| 941 | struct gendisk *prev_disk = NULL, *template_disk = NULL; | ||
| 942 | |||
| 943 | list_for_each_entry(dd, devices, list) { | ||
| 944 | template_disk = dd->dm_dev.bdev->bd_disk; | ||
| 945 | if (!blk_get_integrity(template_disk)) | ||
| 946 | goto no_integrity; | ||
| 947 | if (!match_all && !blk_integrity_is_initialized(template_disk)) | ||
| 948 | continue; /* skip uninitialized profiles */ | ||
| 949 | else if (prev_disk && | ||
| 950 | blk_integrity_compare(prev_disk, template_disk) < 0) | ||
| 951 | goto no_integrity; | ||
| 952 | prev_disk = template_disk; | ||
| 953 | } | ||
| 954 | |||
| 955 | return template_disk; | ||
| 956 | |||
| 957 | no_integrity: | ||
| 958 | if (prev_disk) | ||
| 959 | DMWARN("%s: integrity not set: %s and %s profile mismatch", | ||
| 960 | dm_device_name(t->md), | ||
| 961 | prev_disk->disk_name, | ||
| 962 | template_disk->disk_name); | ||
| 963 | return NULL; | ||
| 964 | } | ||
| 965 | |||
| 966 | /* | ||
| 930 | * Register the mapped device for blk_integrity support if | 967 | * Register the mapped device for blk_integrity support if |
| 931 | * the underlying devices support it. | 968 | * the underlying devices have an integrity profile. But all devices |
| 969 | * may not have matching profiles (checking all devices isn't reliable | ||
| 970 | * during table load because this table may use other DM device(s) which | ||
| 971 | * must be resumed before they will have an initialized integity profile). | ||
| 972 | * Stacked DM devices force a 2 stage integrity profile validation: | ||
| 973 | * 1 - during load, validate all initialized integrity profiles match | ||
| 974 | * 2 - during resume, validate all integrity profiles match | ||
| 932 | */ | 975 | */ |
| 933 | static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) | 976 | static int dm_table_prealloc_integrity(struct dm_table *t, struct mapped_device *md) |
| 934 | { | 977 | { |
| 935 | struct list_head *devices = dm_table_get_devices(t); | 978 | struct gendisk *template_disk = NULL; |
| 936 | struct dm_dev_internal *dd; | ||
| 937 | 979 | ||
| 938 | list_for_each_entry(dd, devices, list) | 980 | template_disk = dm_table_get_integrity_disk(t, false); |
| 939 | if (bdev_get_integrity(dd->dm_dev.bdev)) { | 981 | if (!template_disk) |
| 940 | t->integrity_supported = 1; | 982 | return 0; |
| 941 | return blk_integrity_register(dm_disk(md), NULL); | ||
| 942 | } | ||
| 943 | 983 | ||
| 984 | if (!blk_integrity_is_initialized(dm_disk(md))) { | ||
| 985 | t->integrity_supported = 1; | ||
| 986 | return blk_integrity_register(dm_disk(md), NULL); | ||
| 987 | } | ||
| 988 | |||
| 989 | /* | ||
| 990 | * If DM device already has an initalized integrity | ||
| 991 | * profile the new profile should not conflict. | ||
| 992 | */ | ||
| 993 | if (blk_integrity_is_initialized(template_disk) && | ||
| 994 | blk_integrity_compare(dm_disk(md), template_disk) < 0) { | ||
| 995 | DMWARN("%s: conflict with existing integrity profile: " | ||
| 996 | "%s profile mismatch", | ||
| 997 | dm_device_name(t->md), | ||
| 998 | template_disk->disk_name); | ||
| 999 | return 1; | ||
| 1000 | } | ||
| 1001 | |||
| 1002 | /* Preserve existing initialized integrity profile */ | ||
| 1003 | t->integrity_supported = 1; | ||
| 944 | return 0; | 1004 | return 0; |
| 945 | } | 1005 | } |
| 946 | 1006 | ||
| @@ -1094,41 +1154,27 @@ combine_limits: | |||
| 1094 | 1154 | ||
| 1095 | /* | 1155 | /* |
| 1096 | * Set the integrity profile for this device if all devices used have | 1156 | * Set the integrity profile for this device if all devices used have |
| 1097 | * matching profiles. | 1157 | * matching profiles. We're quite deep in the resume path but still |
| 1158 | * don't know if all devices (particularly DM devices this device | ||
| 1159 | * may be stacked on) have matching profiles. Even if the profiles | ||
| 1160 | * don't match we have no way to fail (to resume) at this point. | ||
| 1098 | */ | 1161 | */ |
| 1099 | static void dm_table_set_integrity(struct dm_table *t) | 1162 | static void dm_table_set_integrity(struct dm_table *t) |
| 1100 | { | 1163 | { |
| 1101 | struct list_head *devices = dm_table_get_devices(t); | 1164 | struct gendisk *template_disk = NULL; |
| 1102 | struct dm_dev_internal *prev = NULL, *dd = NULL; | ||
| 1103 | 1165 | ||
| 1104 | if (!blk_get_integrity(dm_disk(t->md))) | 1166 | if (!blk_get_integrity(dm_disk(t->md))) |
| 1105 | return; | 1167 | return; |
| 1106 | 1168 | ||
| 1107 | list_for_each_entry(dd, devices, list) { | 1169 | template_disk = dm_table_get_integrity_disk(t, true); |
| 1108 | if (prev && | 1170 | if (!template_disk && |
| 1109 | blk_integrity_compare(prev->dm_dev.bdev->bd_disk, | 1171 | blk_integrity_is_initialized(dm_disk(t->md))) { |
| 1110 | dd->dm_dev.bdev->bd_disk) < 0) { | 1172 | DMWARN("%s: device no longer has a valid integrity profile", |
| 1111 | DMWARN("%s: integrity not set: %s and %s mismatch", | 1173 | dm_device_name(t->md)); |
| 1112 | dm_device_name(t->md), | 1174 | return; |
| 1113 | prev->dm_dev.bdev->bd_disk->disk_name, | ||
| 1114 | dd->dm_dev.bdev->bd_disk->disk_name); | ||
| 1115 | goto no_integrity; | ||
| 1116 | } | ||
| 1117 | prev = dd; | ||
| 1118 | } | 1175 | } |
| 1119 | |||
| 1120 | if (!prev || !bdev_get_integrity(prev->dm_dev.bdev)) | ||
| 1121 | goto no_integrity; | ||
| 1122 | |||
| 1123 | blk_integrity_register(dm_disk(t->md), | 1176 | blk_integrity_register(dm_disk(t->md), |
| 1124 | bdev_get_integrity(prev->dm_dev.bdev)); | 1177 | blk_get_integrity(template_disk)); |
| 1125 | |||
| 1126 | return; | ||
| 1127 | |||
| 1128 | no_integrity: | ||
| 1129 | blk_integrity_register(dm_disk(t->md), NULL); | ||
| 1130 | |||
| 1131 | return; | ||
| 1132 | } | 1178 | } |
| 1133 | 1179 | ||
| 1134 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, | 1180 | void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, |
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 339fdc670751..23078dabb6df 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | * | 30 | * |
| 31 | * Different modes can be active at a time, but only | 31 | * Different modes can be active at a time, but only |
| 32 | * one can be set at array creation. Others can be added later. | 32 | * one can be set at array creation. Others can be added later. |
| 33 | * A mode can be one-shot or recurrent with the recurrance being | 33 | * A mode can be one-shot or recurrent with the recurrence being |
| 34 | * once in every N requests. | 34 | * once in every N requests. |
| 35 | * The bottom 5 bits of the "layout" indicate the mode. The | 35 | * The bottom 5 bits of the "layout" indicate the mode. The |
| 36 | * remainder indicate a period, or 0 for one-shot. | 36 | * remainder indicate a period, or 0 for one-shot. |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 8b66e04c2ea6..6e853c61d87e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -447,48 +447,59 @@ EXPORT_SYMBOL(md_flush_request); | |||
| 447 | 447 | ||
| 448 | /* Support for plugging. | 448 | /* Support for plugging. |
| 449 | * This mirrors the plugging support in request_queue, but does not | 449 | * This mirrors the plugging support in request_queue, but does not |
| 450 | * require having a whole queue | 450 | * require having a whole queue or request structures. |
| 451 | * We allocate an md_plug_cb for each md device and each thread it gets | ||
| 452 | * plugged on. This links tot the private plug_handle structure in the | ||
| 453 | * personality data where we keep a count of the number of outstanding | ||
| 454 | * plugs so other code can see if a plug is active. | ||
| 451 | */ | 455 | */ |
| 452 | static void plugger_work(struct work_struct *work) | 456 | struct md_plug_cb { |
| 453 | { | 457 | struct blk_plug_cb cb; |
| 454 | struct plug_handle *plug = | 458 | mddev_t *mddev; |
| 455 | container_of(work, struct plug_handle, unplug_work); | 459 | }; |
| 456 | plug->unplug_fn(plug); | ||
| 457 | } | ||
| 458 | static void plugger_timeout(unsigned long data) | ||
| 459 | { | ||
| 460 | struct plug_handle *plug = (void *)data; | ||
| 461 | kblockd_schedule_work(NULL, &plug->unplug_work); | ||
| 462 | } | ||
| 463 | void plugger_init(struct plug_handle *plug, | ||
| 464 | void (*unplug_fn)(struct plug_handle *)) | ||
| 465 | { | ||
| 466 | plug->unplug_flag = 0; | ||
| 467 | plug->unplug_fn = unplug_fn; | ||
| 468 | init_timer(&plug->unplug_timer); | ||
| 469 | plug->unplug_timer.function = plugger_timeout; | ||
| 470 | plug->unplug_timer.data = (unsigned long)plug; | ||
| 471 | INIT_WORK(&plug->unplug_work, plugger_work); | ||
| 472 | } | ||
| 473 | EXPORT_SYMBOL_GPL(plugger_init); | ||
| 474 | 460 | ||
| 475 | void plugger_set_plug(struct plug_handle *plug) | 461 | static void plugger_unplug(struct blk_plug_cb *cb) |
| 476 | { | 462 | { |
| 477 | if (!test_and_set_bit(PLUGGED_FLAG, &plug->unplug_flag)) | 463 | struct md_plug_cb *mdcb = container_of(cb, struct md_plug_cb, cb); |
| 478 | mod_timer(&plug->unplug_timer, jiffies + msecs_to_jiffies(3)+1); | 464 | if (atomic_dec_and_test(&mdcb->mddev->plug_cnt)) |
| 465 | md_wakeup_thread(mdcb->mddev->thread); | ||
| 466 | kfree(mdcb); | ||
| 479 | } | 467 | } |
| 480 | EXPORT_SYMBOL_GPL(plugger_set_plug); | ||
| 481 | 468 | ||
| 482 | int plugger_remove_plug(struct plug_handle *plug) | 469 | /* Check that an unplug wakeup will come shortly. |
| 470 | * If not, wakeup the md thread immediately | ||
| 471 | */ | ||
| 472 | int mddev_check_plugged(mddev_t *mddev) | ||
| 483 | { | 473 | { |
| 484 | if (test_and_clear_bit(PLUGGED_FLAG, &plug->unplug_flag)) { | 474 | struct blk_plug *plug = current->plug; |
| 485 | del_timer(&plug->unplug_timer); | 475 | struct md_plug_cb *mdcb; |
| 486 | return 1; | 476 | |
| 487 | } else | 477 | if (!plug) |
| 478 | return 0; | ||
| 479 | |||
| 480 | list_for_each_entry(mdcb, &plug->cb_list, cb.list) { | ||
| 481 | if (mdcb->cb.callback == plugger_unplug && | ||
| 482 | mdcb->mddev == mddev) { | ||
| 483 | /* Already on the list, move to top */ | ||
| 484 | if (mdcb != list_first_entry(&plug->cb_list, | ||
| 485 | struct md_plug_cb, | ||
| 486 | cb.list)) | ||
| 487 | list_move(&mdcb->cb.list, &plug->cb_list); | ||
| 488 | return 1; | ||
| 489 | } | ||
| 490 | } | ||
| 491 | /* Not currently on the callback list */ | ||
| 492 | mdcb = kmalloc(sizeof(*mdcb), GFP_ATOMIC); | ||
| 493 | if (!mdcb) | ||
| 488 | return 0; | 494 | return 0; |
| 489 | } | ||
| 490 | EXPORT_SYMBOL_GPL(plugger_remove_plug); | ||
| 491 | 495 | ||
| 496 | mdcb->mddev = mddev; | ||
| 497 | mdcb->cb.callback = plugger_unplug; | ||
| 498 | atomic_inc(&mddev->plug_cnt); | ||
| 499 | list_add(&mdcb->cb.list, &plug->cb_list); | ||
| 500 | return 1; | ||
| 501 | } | ||
| 502 | EXPORT_SYMBOL_GPL(mddev_check_plugged); | ||
| 492 | 503 | ||
| 493 | static inline mddev_t *mddev_get(mddev_t *mddev) | 504 | static inline mddev_t *mddev_get(mddev_t *mddev) |
| 494 | { | 505 | { |
| @@ -538,6 +549,7 @@ void mddev_init(mddev_t *mddev) | |||
| 538 | atomic_set(&mddev->active, 1); | 549 | atomic_set(&mddev->active, 1); |
| 539 | atomic_set(&mddev->openers, 0); | 550 | atomic_set(&mddev->openers, 0); |
| 540 | atomic_set(&mddev->active_io, 0); | 551 | atomic_set(&mddev->active_io, 0); |
| 552 | atomic_set(&mddev->plug_cnt, 0); | ||
| 541 | spin_lock_init(&mddev->write_lock); | 553 | spin_lock_init(&mddev->write_lock); |
| 542 | atomic_set(&mddev->flush_pending, 0); | 554 | atomic_set(&mddev->flush_pending, 0); |
| 543 | init_waitqueue_head(&mddev->sb_wait); | 555 | init_waitqueue_head(&mddev->sb_wait); |
| @@ -4723,7 +4735,6 @@ static void md_clean(mddev_t *mddev) | |||
| 4723 | mddev->bitmap_info.chunksize = 0; | 4735 | mddev->bitmap_info.chunksize = 0; |
| 4724 | mddev->bitmap_info.daemon_sleep = 0; | 4736 | mddev->bitmap_info.daemon_sleep = 0; |
| 4725 | mddev->bitmap_info.max_write_behind = 0; | 4737 | mddev->bitmap_info.max_write_behind = 0; |
| 4726 | mddev->plug = NULL; | ||
| 4727 | } | 4738 | } |
| 4728 | 4739 | ||
| 4729 | static void __md_stop_writes(mddev_t *mddev) | 4740 | static void __md_stop_writes(mddev_t *mddev) |
| @@ -6266,7 +6277,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
| 6266 | * rt is a sector_t, so could be 32bit or 64bit. | 6277 | * rt is a sector_t, so could be 32bit or 64bit. |
| 6267 | * So we divide before multiply in case it is 32bit and close | 6278 | * So we divide before multiply in case it is 32bit and close |
| 6268 | * to the limit. | 6279 | * to the limit. |
| 6269 | * We scale the divisor (db) by 32 to avoid loosing precision | 6280 | * We scale the divisor (db) by 32 to avoid losing precision |
| 6270 | * near the end of resync when the number of remaining sectors | 6281 | * near the end of resync when the number of remaining sectors |
| 6271 | * is close to 'db'. | 6282 | * is close to 'db'. |
| 6272 | * We then divide rt by 32 after multiplying by db to compensate. | 6283 | * We then divide rt by 32 after multiplying by db to compensate. |
| @@ -6688,12 +6699,6 @@ int md_allow_write(mddev_t *mddev) | |||
| 6688 | } | 6699 | } |
| 6689 | EXPORT_SYMBOL_GPL(md_allow_write); | 6700 | EXPORT_SYMBOL_GPL(md_allow_write); |
| 6690 | 6701 | ||
| 6691 | void md_unplug(mddev_t *mddev) | ||
| 6692 | { | ||
| 6693 | if (mddev->plug) | ||
| 6694 | mddev->plug->unplug_fn(mddev->plug); | ||
| 6695 | } | ||
| 6696 | |||
| 6697 | #define SYNC_MARKS 10 | 6702 | #define SYNC_MARKS 10 |
| 6698 | #define SYNC_MARK_STEP (3*HZ) | 6703 | #define SYNC_MARK_STEP (3*HZ) |
| 6699 | void md_do_sync(mddev_t *mddev) | 6704 | void md_do_sync(mddev_t *mddev) |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 12215d437fcc..0b1fd3f1d85b 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -29,26 +29,6 @@ | |||
| 29 | typedef struct mddev_s mddev_t; | 29 | typedef struct mddev_s mddev_t; |
| 30 | typedef struct mdk_rdev_s mdk_rdev_t; | 30 | typedef struct mdk_rdev_s mdk_rdev_t; |
| 31 | 31 | ||
| 32 | /* generic plugging support - like that provided with request_queue, | ||
| 33 | * but does not require a request_queue | ||
| 34 | */ | ||
| 35 | struct plug_handle { | ||
| 36 | void (*unplug_fn)(struct plug_handle *); | ||
| 37 | struct timer_list unplug_timer; | ||
| 38 | struct work_struct unplug_work; | ||
| 39 | unsigned long unplug_flag; | ||
| 40 | }; | ||
| 41 | #define PLUGGED_FLAG 1 | ||
| 42 | void plugger_init(struct plug_handle *plug, | ||
| 43 | void (*unplug_fn)(struct plug_handle *)); | ||
| 44 | void plugger_set_plug(struct plug_handle *plug); | ||
| 45 | int plugger_remove_plug(struct plug_handle *plug); | ||
| 46 | static inline void plugger_flush(struct plug_handle *plug) | ||
| 47 | { | ||
| 48 | del_timer_sync(&plug->unplug_timer); | ||
| 49 | cancel_work_sync(&plug->unplug_work); | ||
| 50 | } | ||
| 51 | |||
| 52 | /* | 32 | /* |
| 53 | * MD's 'extended' device | 33 | * MD's 'extended' device |
| 54 | */ | 34 | */ |
| @@ -94,7 +74,7 @@ struct mdk_rdev_s | |||
| 94 | #define In_sync 2 /* device is in_sync with rest of array */ | 74 | #define In_sync 2 /* device is in_sync with rest of array */ |
| 95 | #define WriteMostly 4 /* Avoid reading if at all possible */ | 75 | #define WriteMostly 4 /* Avoid reading if at all possible */ |
| 96 | #define AutoDetected 7 /* added by auto-detect */ | 76 | #define AutoDetected 7 /* added by auto-detect */ |
| 97 | #define Blocked 8 /* An error occured on an externally | 77 | #define Blocked 8 /* An error occurred on an externally |
| 98 | * managed array, don't allow writes | 78 | * managed array, don't allow writes |
| 99 | * until it is cleared */ | 79 | * until it is cleared */ |
| 100 | wait_queue_head_t blocked_wait; | 80 | wait_queue_head_t blocked_wait; |
| @@ -199,6 +179,9 @@ struct mddev_s | |||
| 199 | int delta_disks, new_level, new_layout; | 179 | int delta_disks, new_level, new_layout; |
| 200 | int new_chunk_sectors; | 180 | int new_chunk_sectors; |
| 201 | 181 | ||
| 182 | atomic_t plug_cnt; /* If device is expecting | ||
| 183 | * more bios soon. | ||
| 184 | */ | ||
| 202 | struct mdk_thread_s *thread; /* management thread */ | 185 | struct mdk_thread_s *thread; /* management thread */ |
| 203 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ | 186 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ |
| 204 | sector_t curr_resync; /* last block scheduled */ | 187 | sector_t curr_resync; /* last block scheduled */ |
| @@ -336,7 +319,6 @@ struct mddev_s | |||
| 336 | struct list_head all_mddevs; | 319 | struct list_head all_mddevs; |
| 337 | 320 | ||
| 338 | struct attribute_group *to_remove; | 321 | struct attribute_group *to_remove; |
| 339 | struct plug_handle *plug; /* if used by personality */ | ||
| 340 | 322 | ||
| 341 | struct bio_set *bio_set; | 323 | struct bio_set *bio_set; |
| 342 | 324 | ||
| @@ -516,7 +498,6 @@ extern int md_integrity_register(mddev_t *mddev); | |||
| 516 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 498 | extern void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
| 517 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); | 499 | extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); |
| 518 | extern void restore_bitmap_write_access(struct file *file); | 500 | extern void restore_bitmap_write_access(struct file *file); |
| 519 | extern void md_unplug(mddev_t *mddev); | ||
| 520 | 501 | ||
| 521 | extern void mddev_init(mddev_t *mddev); | 502 | extern void mddev_init(mddev_t *mddev); |
| 522 | extern int md_run(mddev_t *mddev); | 503 | extern int md_run(mddev_t *mddev); |
| @@ -530,4 +511,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | |||
| 530 | mddev_t *mddev); | 511 | mddev_t *mddev); |
| 531 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | 512 | extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, |
| 532 | mddev_t *mddev); | 513 | mddev_t *mddev); |
| 514 | extern int mddev_check_plugged(mddev_t *mddev); | ||
| 533 | #endif /* _MD_MD_H */ | 515 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c2a21ae56d97..2b7a7ff401dc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -565,12 +565,6 @@ static void flush_pending_writes(conf_t *conf) | |||
| 565 | spin_unlock_irq(&conf->device_lock); | 565 | spin_unlock_irq(&conf->device_lock); |
| 566 | } | 566 | } |
| 567 | 567 | ||
| 568 | static void md_kick_device(mddev_t *mddev) | ||
| 569 | { | ||
| 570 | blk_flush_plug(current); | ||
| 571 | md_wakeup_thread(mddev->thread); | ||
| 572 | } | ||
| 573 | |||
| 574 | /* Barriers.... | 568 | /* Barriers.... |
| 575 | * Sometimes we need to suspend IO while we do something else, | 569 | * Sometimes we need to suspend IO while we do something else, |
| 576 | * either some resync/recovery, or reconfigure the array. | 570 | * either some resync/recovery, or reconfigure the array. |
| @@ -600,7 +594,7 @@ static void raise_barrier(conf_t *conf) | |||
| 600 | 594 | ||
| 601 | /* Wait until no block IO is waiting */ | 595 | /* Wait until no block IO is waiting */ |
| 602 | wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, | 596 | wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting, |
| 603 | conf->resync_lock, md_kick_device(conf->mddev)); | 597 | conf->resync_lock, ); |
| 604 | 598 | ||
| 605 | /* block any new IO from starting */ | 599 | /* block any new IO from starting */ |
| 606 | conf->barrier++; | 600 | conf->barrier++; |
| @@ -608,7 +602,7 @@ static void raise_barrier(conf_t *conf) | |||
| 608 | /* Now wait for all pending IO to complete */ | 602 | /* Now wait for all pending IO to complete */ |
| 609 | wait_event_lock_irq(conf->wait_barrier, | 603 | wait_event_lock_irq(conf->wait_barrier, |
| 610 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 604 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, |
| 611 | conf->resync_lock, md_kick_device(conf->mddev)); | 605 | conf->resync_lock, ); |
| 612 | 606 | ||
| 613 | spin_unlock_irq(&conf->resync_lock); | 607 | spin_unlock_irq(&conf->resync_lock); |
| 614 | } | 608 | } |
| @@ -630,7 +624,7 @@ static void wait_barrier(conf_t *conf) | |||
| 630 | conf->nr_waiting++; | 624 | conf->nr_waiting++; |
| 631 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 625 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, |
| 632 | conf->resync_lock, | 626 | conf->resync_lock, |
| 633 | md_kick_device(conf->mddev)); | 627 | ); |
| 634 | conf->nr_waiting--; | 628 | conf->nr_waiting--; |
| 635 | } | 629 | } |
| 636 | conf->nr_pending++; | 630 | conf->nr_pending++; |
| @@ -666,8 +660,7 @@ static void freeze_array(conf_t *conf) | |||
| 666 | wait_event_lock_irq(conf->wait_barrier, | 660 | wait_event_lock_irq(conf->wait_barrier, |
| 667 | conf->nr_pending == conf->nr_queued+1, | 661 | conf->nr_pending == conf->nr_queued+1, |
| 668 | conf->resync_lock, | 662 | conf->resync_lock, |
| 669 | ({ flush_pending_writes(conf); | 663 | flush_pending_writes(conf)); |
| 670 | md_kick_device(conf->mddev); })); | ||
| 671 | spin_unlock_irq(&conf->resync_lock); | 664 | spin_unlock_irq(&conf->resync_lock); |
| 672 | } | 665 | } |
| 673 | static void unfreeze_array(conf_t *conf) | 666 | static void unfreeze_array(conf_t *conf) |
| @@ -729,6 +722,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 729 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 722 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
| 730 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 723 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
| 731 | mdk_rdev_t *blocked_rdev; | 724 | mdk_rdev_t *blocked_rdev; |
| 725 | int plugged; | ||
| 732 | 726 | ||
| 733 | /* | 727 | /* |
| 734 | * Register the new request and wait if the reconstruction | 728 | * Register the new request and wait if the reconstruction |
| @@ -820,6 +814,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 820 | * inc refcount on their rdev. Record them by setting | 814 | * inc refcount on their rdev. Record them by setting |
| 821 | * bios[x] to bio | 815 | * bios[x] to bio |
| 822 | */ | 816 | */ |
| 817 | plugged = mddev_check_plugged(mddev); | ||
| 818 | |||
| 823 | disks = conf->raid_disks; | 819 | disks = conf->raid_disks; |
| 824 | retry_write: | 820 | retry_write: |
| 825 | blocked_rdev = NULL; | 821 | blocked_rdev = NULL; |
| @@ -925,7 +921,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 925 | /* In case raid1d snuck in to freeze_array */ | 921 | /* In case raid1d snuck in to freeze_array */ |
| 926 | wake_up(&conf->wait_barrier); | 922 | wake_up(&conf->wait_barrier); |
| 927 | 923 | ||
| 928 | if (do_sync || !bitmap) | 924 | if (do_sync || !bitmap || !plugged) |
| 929 | md_wakeup_thread(mddev->thread); | 925 | md_wakeup_thread(mddev->thread); |
| 930 | 926 | ||
| 931 | return 0; | 927 | return 0; |
| @@ -1516,13 +1512,16 @@ static void raid1d(mddev_t *mddev) | |||
| 1516 | conf_t *conf = mddev->private; | 1512 | conf_t *conf = mddev->private; |
| 1517 | struct list_head *head = &conf->retry_list; | 1513 | struct list_head *head = &conf->retry_list; |
| 1518 | mdk_rdev_t *rdev; | 1514 | mdk_rdev_t *rdev; |
| 1515 | struct blk_plug plug; | ||
| 1519 | 1516 | ||
| 1520 | md_check_recovery(mddev); | 1517 | md_check_recovery(mddev); |
| 1521 | 1518 | ||
| 1519 | blk_start_plug(&plug); | ||
| 1522 | for (;;) { | 1520 | for (;;) { |
| 1523 | char b[BDEVNAME_SIZE]; | 1521 | char b[BDEVNAME_SIZE]; |
| 1524 | 1522 | ||
| 1525 | flush_pending_writes(conf); | 1523 | if (atomic_read(&mddev->plug_cnt) == 0) |
| 1524 | flush_pending_writes(conf); | ||
| 1526 | 1525 | ||
| 1527 | spin_lock_irqsave(&conf->device_lock, flags); | 1526 | spin_lock_irqsave(&conf->device_lock, flags); |
| 1528 | if (list_empty(head)) { | 1527 | if (list_empty(head)) { |
| @@ -1593,6 +1592,7 @@ static void raid1d(mddev_t *mddev) | |||
| 1593 | } | 1592 | } |
| 1594 | cond_resched(); | 1593 | cond_resched(); |
| 1595 | } | 1594 | } |
| 1595 | blk_finish_plug(&plug); | ||
| 1596 | } | 1596 | } |
| 1597 | 1597 | ||
| 1598 | 1598 | ||
| @@ -2039,7 +2039,6 @@ static int stop(mddev_t *mddev) | |||
| 2039 | 2039 | ||
| 2040 | md_unregister_thread(mddev->thread); | 2040 | md_unregister_thread(mddev->thread); |
| 2041 | mddev->thread = NULL; | 2041 | mddev->thread = NULL; |
| 2042 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | ||
| 2043 | if (conf->r1bio_pool) | 2042 | if (conf->r1bio_pool) |
| 2044 | mempool_destroy(conf->r1bio_pool); | 2043 | mempool_destroy(conf->r1bio_pool); |
| 2045 | kfree(conf->mirrors); | 2044 | kfree(conf->mirrors); |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f7b62370b374..8e9462626ec5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * | 5 | * |
| 6 | * RAID-10 support for md. | 6 | * RAID-10 support for md. |
| 7 | * | 7 | * |
| 8 | * Base on code in raid1.c. See raid1.c for futher copyright information. | 8 | * Base on code in raid1.c. See raid1.c for further copyright information. |
| 9 | * | 9 | * |
| 10 | * | 10 | * |
| 11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
| @@ -340,14 +340,14 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
| 340 | 340 | ||
| 341 | /* | 341 | /* |
| 342 | * RAID10 layout manager | 342 | * RAID10 layout manager |
| 343 | * Aswell as the chunksize and raid_disks count, there are two | 343 | * As well as the chunksize and raid_disks count, there are two |
| 344 | * parameters: near_copies and far_copies. | 344 | * parameters: near_copies and far_copies. |
| 345 | * near_copies * far_copies must be <= raid_disks. | 345 | * near_copies * far_copies must be <= raid_disks. |
| 346 | * Normally one of these will be 1. | 346 | * Normally one of these will be 1. |
| 347 | * If both are 1, we get raid0. | 347 | * If both are 1, we get raid0. |
| 348 | * If near_copies == raid_disks, we get raid1. | 348 | * If near_copies == raid_disks, we get raid1. |
| 349 | * | 349 | * |
| 350 | * Chunks are layed out in raid0 style with near_copies copies of the | 350 | * Chunks are laid out in raid0 style with near_copies copies of the |
| 351 | * first chunk, followed by near_copies copies of the next chunk and | 351 | * first chunk, followed by near_copies copies of the next chunk and |
| 352 | * so on. | 352 | * so on. |
| 353 | * If far_copies > 1, then after 1/far_copies of the array has been assigned | 353 | * If far_copies > 1, then after 1/far_copies of the array has been assigned |
| @@ -634,12 +634,6 @@ static void flush_pending_writes(conf_t *conf) | |||
| 634 | spin_unlock_irq(&conf->device_lock); | 634 | spin_unlock_irq(&conf->device_lock); |
| 635 | } | 635 | } |
| 636 | 636 | ||
| 637 | static void md_kick_device(mddev_t *mddev) | ||
| 638 | { | ||
| 639 | blk_flush_plug(current); | ||
| 640 | md_wakeup_thread(mddev->thread); | ||
| 641 | } | ||
| 642 | |||
| 643 | /* Barriers.... | 637 | /* Barriers.... |
| 644 | * Sometimes we need to suspend IO while we do something else, | 638 | * Sometimes we need to suspend IO while we do something else, |
| 645 | * either some resync/recovery, or reconfigure the array. | 639 | * either some resync/recovery, or reconfigure the array. |
| @@ -669,15 +663,15 @@ static void raise_barrier(conf_t *conf, int force) | |||
| 669 | 663 | ||
| 670 | /* Wait until no block IO is waiting (unless 'force') */ | 664 | /* Wait until no block IO is waiting (unless 'force') */ |
| 671 | wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, | 665 | wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting, |
| 672 | conf->resync_lock, md_kick_device(conf->mddev)); | 666 | conf->resync_lock, ); |
| 673 | 667 | ||
| 674 | /* block any new IO from starting */ | 668 | /* block any new IO from starting */ |
| 675 | conf->barrier++; | 669 | conf->barrier++; |
| 676 | 670 | ||
| 677 | /* No wait for all pending IO to complete */ | 671 | /* Now wait for all pending IO to complete */ |
| 678 | wait_event_lock_irq(conf->wait_barrier, | 672 | wait_event_lock_irq(conf->wait_barrier, |
| 679 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, | 673 | !conf->nr_pending && conf->barrier < RESYNC_DEPTH, |
| 680 | conf->resync_lock, md_kick_device(conf->mddev)); | 674 | conf->resync_lock, ); |
| 681 | 675 | ||
| 682 | spin_unlock_irq(&conf->resync_lock); | 676 | spin_unlock_irq(&conf->resync_lock); |
| 683 | } | 677 | } |
| @@ -698,7 +692,7 @@ static void wait_barrier(conf_t *conf) | |||
| 698 | conf->nr_waiting++; | 692 | conf->nr_waiting++; |
| 699 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, | 693 | wait_event_lock_irq(conf->wait_barrier, !conf->barrier, |
| 700 | conf->resync_lock, | 694 | conf->resync_lock, |
| 701 | md_kick_device(conf->mddev)); | 695 | ); |
| 702 | conf->nr_waiting--; | 696 | conf->nr_waiting--; |
| 703 | } | 697 | } |
| 704 | conf->nr_pending++; | 698 | conf->nr_pending++; |
| @@ -734,8 +728,8 @@ static void freeze_array(conf_t *conf) | |||
| 734 | wait_event_lock_irq(conf->wait_barrier, | 728 | wait_event_lock_irq(conf->wait_barrier, |
| 735 | conf->nr_pending == conf->nr_queued+1, | 729 | conf->nr_pending == conf->nr_queued+1, |
| 736 | conf->resync_lock, | 730 | conf->resync_lock, |
| 737 | ({ flush_pending_writes(conf); | 731 | flush_pending_writes(conf)); |
| 738 | md_kick_device(conf->mddev); })); | 732 | |
| 739 | spin_unlock_irq(&conf->resync_lock); | 733 | spin_unlock_irq(&conf->resync_lock); |
| 740 | } | 734 | } |
| 741 | 735 | ||
| @@ -762,6 +756,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 762 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 756 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
| 763 | unsigned long flags; | 757 | unsigned long flags; |
| 764 | mdk_rdev_t *blocked_rdev; | 758 | mdk_rdev_t *blocked_rdev; |
| 759 | int plugged; | ||
| 765 | 760 | ||
| 766 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { | 761 | if (unlikely(bio->bi_rw & REQ_FLUSH)) { |
| 767 | md_flush_request(mddev, bio); | 762 | md_flush_request(mddev, bio); |
| @@ -870,6 +865,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 870 | * inc refcount on their rdev. Record them by setting | 865 | * inc refcount on their rdev. Record them by setting |
| 871 | * bios[x] to bio | 866 | * bios[x] to bio |
| 872 | */ | 867 | */ |
| 868 | plugged = mddev_check_plugged(mddev); | ||
| 869 | |||
| 873 | raid10_find_phys(conf, r10_bio); | 870 | raid10_find_phys(conf, r10_bio); |
| 874 | retry_write: | 871 | retry_write: |
| 875 | blocked_rdev = NULL; | 872 | blocked_rdev = NULL; |
| @@ -946,9 +943,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 946 | /* In case raid10d snuck in to freeze_array */ | 943 | /* In case raid10d snuck in to freeze_array */ |
| 947 | wake_up(&conf->wait_barrier); | 944 | wake_up(&conf->wait_barrier); |
| 948 | 945 | ||
| 949 | if (do_sync || !mddev->bitmap) | 946 | if (do_sync || !mddev->bitmap || !plugged) |
| 950 | md_wakeup_thread(mddev->thread); | 947 | md_wakeup_thread(mddev->thread); |
| 951 | |||
| 952 | return 0; | 948 | return 0; |
| 953 | } | 949 | } |
| 954 | 950 | ||
| @@ -1640,9 +1636,11 @@ static void raid10d(mddev_t *mddev) | |||
| 1640 | conf_t *conf = mddev->private; | 1636 | conf_t *conf = mddev->private; |
| 1641 | struct list_head *head = &conf->retry_list; | 1637 | struct list_head *head = &conf->retry_list; |
| 1642 | mdk_rdev_t *rdev; | 1638 | mdk_rdev_t *rdev; |
| 1639 | struct blk_plug plug; | ||
| 1643 | 1640 | ||
| 1644 | md_check_recovery(mddev); | 1641 | md_check_recovery(mddev); |
| 1645 | 1642 | ||
| 1643 | blk_start_plug(&plug); | ||
| 1646 | for (;;) { | 1644 | for (;;) { |
| 1647 | char b[BDEVNAME_SIZE]; | 1645 | char b[BDEVNAME_SIZE]; |
| 1648 | 1646 | ||
| @@ -1716,6 +1714,7 @@ static void raid10d(mddev_t *mddev) | |||
| 1716 | } | 1714 | } |
| 1717 | cond_resched(); | 1715 | cond_resched(); |
| 1718 | } | 1716 | } |
| 1717 | blk_finish_plug(&plug); | ||
| 1719 | } | 1718 | } |
| 1720 | 1719 | ||
| 1721 | 1720 | ||
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 2316ac2e8e21..944b1104d3b4 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
| @@ -17,8 +17,8 @@ struct r10_private_data_s { | |||
| 17 | spinlock_t device_lock; | 17 | spinlock_t device_lock; |
| 18 | 18 | ||
| 19 | /* geometry */ | 19 | /* geometry */ |
| 20 | int near_copies; /* number of copies layed out raid0 style */ | 20 | int near_copies; /* number of copies laid out raid0 style */ |
| 21 | int far_copies; /* number of copies layed out | 21 | int far_copies; /* number of copies laid out |
| 22 | * at large strides across drives | 22 | * at large strides across drives |
| 23 | */ | 23 | */ |
| 24 | int far_offset; /* far_copies are offset by 1 stripe | 24 | int far_offset; /* far_copies are offset by 1 stripe |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e867ee42b152..f301e6ae220c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -27,12 +27,12 @@ | |||
| 27 | * | 27 | * |
| 28 | * We group bitmap updates into batches. Each batch has a number. | 28 | * We group bitmap updates into batches. Each batch has a number. |
| 29 | * We may write out several batches at once, but that isn't very important. | 29 | * We may write out several batches at once, but that isn't very important. |
| 30 | * conf->bm_write is the number of the last batch successfully written. | 30 | * conf->seq_write is the number of the last batch successfully written. |
| 31 | * conf->bm_flush is the number of the last batch that was closed to | 31 | * conf->seq_flush is the number of the last batch that was closed to |
| 32 | * new additions. | 32 | * new additions. |
| 33 | * When we discover that we will need to write to any block in a stripe | 33 | * When we discover that we will need to write to any block in a stripe |
| 34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq | 34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq |
| 35 | * the number of the batch it will be in. This is bm_flush+1. | 35 | * the number of the batch it will be in. This is seq_flush+1. |
| 36 | * When we are ready to do a write, if that batch hasn't been written yet, | 36 | * When we are ready to do a write, if that batch hasn't been written yet, |
| 37 | * we plug the array and queue the stripe for later. | 37 | * we plug the array and queue the stripe for later. |
| 38 | * When an unplug happens, we increment bm_flush, thus closing the current | 38 | * When an unplug happens, we increment bm_flush, thus closing the current |
| @@ -199,14 +199,12 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
| 199 | BUG_ON(!list_empty(&sh->lru)); | 199 | BUG_ON(!list_empty(&sh->lru)); |
| 200 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 200 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
| 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
| 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { | 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) |
| 203 | list_add_tail(&sh->lru, &conf->delayed_list); | 203 | list_add_tail(&sh->lru, &conf->delayed_list); |
| 204 | plugger_set_plug(&conf->plug); | 204 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
| 205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 205 | sh->bm_seq - conf->seq_write > 0) |
| 206 | sh->bm_seq - conf->seq_write > 0) { | ||
| 207 | list_add_tail(&sh->lru, &conf->bitmap_list); | 206 | list_add_tail(&sh->lru, &conf->bitmap_list); |
| 208 | plugger_set_plug(&conf->plug); | 207 | else { |
| 209 | } else { | ||
| 210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 208 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
| 211 | list_add_tail(&sh->lru, &conf->handle_list); | 209 | list_add_tail(&sh->lru, &conf->handle_list); |
| 212 | } | 210 | } |
| @@ -461,7 +459,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, | |||
| 461 | < (conf->max_nr_stripes *3/4) | 459 | < (conf->max_nr_stripes *3/4) |
| 462 | || !conf->inactive_blocked), | 460 | || !conf->inactive_blocked), |
| 463 | conf->device_lock, | 461 | conf->device_lock, |
| 464 | md_raid5_kick_device(conf)); | 462 | ); |
| 465 | conf->inactive_blocked = 0; | 463 | conf->inactive_blocked = 0; |
| 466 | } else | 464 | } else |
| 467 | init_stripe(sh, sector, previous); | 465 | init_stripe(sh, sector, previous); |
| @@ -1470,7 +1468,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
| 1470 | wait_event_lock_irq(conf->wait_for_stripe, | 1468 | wait_event_lock_irq(conf->wait_for_stripe, |
| 1471 | !list_empty(&conf->inactive_list), | 1469 | !list_empty(&conf->inactive_list), |
| 1472 | conf->device_lock, | 1470 | conf->device_lock, |
| 1473 | blk_flush_plug(current)); | 1471 | ); |
| 1474 | osh = get_free_stripe(conf); | 1472 | osh = get_free_stripe(conf); |
| 1475 | spin_unlock_irq(&conf->device_lock); | 1473 | spin_unlock_irq(&conf->device_lock); |
| 1476 | atomic_set(&nsh->count, 1); | 1474 | atomic_set(&nsh->count, 1); |
| @@ -3623,8 +3621,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
| 3623 | atomic_inc(&conf->preread_active_stripes); | 3621 | atomic_inc(&conf->preread_active_stripes); |
| 3624 | list_add_tail(&sh->lru, &conf->hold_list); | 3622 | list_add_tail(&sh->lru, &conf->hold_list); |
| 3625 | } | 3623 | } |
| 3626 | } else | 3624 | } |
| 3627 | plugger_set_plug(&conf->plug); | ||
| 3628 | } | 3625 | } |
| 3629 | 3626 | ||
| 3630 | static void activate_bit_delay(raid5_conf_t *conf) | 3627 | static void activate_bit_delay(raid5_conf_t *conf) |
| @@ -3641,21 +3638,6 @@ static void activate_bit_delay(raid5_conf_t *conf) | |||
| 3641 | } | 3638 | } |
| 3642 | } | 3639 | } |
| 3643 | 3640 | ||
| 3644 | void md_raid5_kick_device(raid5_conf_t *conf) | ||
| 3645 | { | ||
| 3646 | blk_flush_plug(current); | ||
| 3647 | raid5_activate_delayed(conf); | ||
| 3648 | md_wakeup_thread(conf->mddev->thread); | ||
| 3649 | } | ||
| 3650 | EXPORT_SYMBOL_GPL(md_raid5_kick_device); | ||
| 3651 | |||
| 3652 | static void raid5_unplug(struct plug_handle *plug) | ||
| 3653 | { | ||
| 3654 | raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); | ||
| 3655 | |||
| 3656 | md_raid5_kick_device(conf); | ||
| 3657 | } | ||
| 3658 | |||
| 3659 | int md_raid5_congested(mddev_t *mddev, int bits) | 3641 | int md_raid5_congested(mddev_t *mddev, int bits) |
| 3660 | { | 3642 | { |
| 3661 | raid5_conf_t *conf = mddev->private; | 3643 | raid5_conf_t *conf = mddev->private; |
| @@ -3945,6 +3927,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
| 3945 | struct stripe_head *sh; | 3927 | struct stripe_head *sh; |
| 3946 | const int rw = bio_data_dir(bi); | 3928 | const int rw = bio_data_dir(bi); |
| 3947 | int remaining; | 3929 | int remaining; |
| 3930 | int plugged; | ||
| 3948 | 3931 | ||
| 3949 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { | 3932 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
| 3950 | md_flush_request(mddev, bi); | 3933 | md_flush_request(mddev, bi); |
| @@ -3963,6 +3946,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
| 3963 | bi->bi_next = NULL; | 3946 | bi->bi_next = NULL; |
| 3964 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 3947 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
| 3965 | 3948 | ||
| 3949 | plugged = mddev_check_plugged(mddev); | ||
| 3966 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 3950 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
| 3967 | DEFINE_WAIT(w); | 3951 | DEFINE_WAIT(w); |
| 3968 | int disks, data_disks; | 3952 | int disks, data_disks; |
| @@ -4057,7 +4041,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
| 4057 | * add failed due to overlap. Flush everything | 4041 | * add failed due to overlap. Flush everything |
| 4058 | * and wait a while | 4042 | * and wait a while |
| 4059 | */ | 4043 | */ |
| 4060 | md_raid5_kick_device(conf); | 4044 | md_wakeup_thread(mddev->thread); |
| 4061 | release_stripe(sh); | 4045 | release_stripe(sh); |
| 4062 | schedule(); | 4046 | schedule(); |
| 4063 | goto retry; | 4047 | goto retry; |
| @@ -4077,6 +4061,9 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
| 4077 | } | 4061 | } |
| 4078 | 4062 | ||
| 4079 | } | 4063 | } |
| 4064 | if (!plugged) | ||
| 4065 | md_wakeup_thread(mddev->thread); | ||
| 4066 | |||
| 4080 | spin_lock_irq(&conf->device_lock); | 4067 | spin_lock_irq(&conf->device_lock); |
| 4081 | remaining = raid5_dec_bi_phys_segments(bi); | 4068 | remaining = raid5_dec_bi_phys_segments(bi); |
| 4082 | spin_unlock_irq(&conf->device_lock); | 4069 | spin_unlock_irq(&conf->device_lock); |
| @@ -4478,24 +4465,30 @@ static void raid5d(mddev_t *mddev) | |||
| 4478 | struct stripe_head *sh; | 4465 | struct stripe_head *sh; |
| 4479 | raid5_conf_t *conf = mddev->private; | 4466 | raid5_conf_t *conf = mddev->private; |
| 4480 | int handled; | 4467 | int handled; |
| 4468 | struct blk_plug plug; | ||
| 4481 | 4469 | ||
| 4482 | pr_debug("+++ raid5d active\n"); | 4470 | pr_debug("+++ raid5d active\n"); |
| 4483 | 4471 | ||
| 4484 | md_check_recovery(mddev); | 4472 | md_check_recovery(mddev); |
| 4485 | 4473 | ||
| 4474 | blk_start_plug(&plug); | ||
| 4486 | handled = 0; | 4475 | handled = 0; |
| 4487 | spin_lock_irq(&conf->device_lock); | 4476 | spin_lock_irq(&conf->device_lock); |
| 4488 | while (1) { | 4477 | while (1) { |
| 4489 | struct bio *bio; | 4478 | struct bio *bio; |
| 4490 | 4479 | ||
| 4491 | if (conf->seq_flush != conf->seq_write) { | 4480 | if (atomic_read(&mddev->plug_cnt) == 0 && |
| 4492 | int seq = conf->seq_flush; | 4481 | !list_empty(&conf->bitmap_list)) { |
| 4482 | /* Now is a good time to flush some bitmap updates */ | ||
| 4483 | conf->seq_flush++; | ||
| 4493 | spin_unlock_irq(&conf->device_lock); | 4484 | spin_unlock_irq(&conf->device_lock); |
| 4494 | bitmap_unplug(mddev->bitmap); | 4485 | bitmap_unplug(mddev->bitmap); |
| 4495 | spin_lock_irq(&conf->device_lock); | 4486 | spin_lock_irq(&conf->device_lock); |
| 4496 | conf->seq_write = seq; | 4487 | conf->seq_write = conf->seq_flush; |
| 4497 | activate_bit_delay(conf); | 4488 | activate_bit_delay(conf); |
| 4498 | } | 4489 | } |
| 4490 | if (atomic_read(&mddev->plug_cnt) == 0) | ||
| 4491 | raid5_activate_delayed(conf); | ||
| 4499 | 4492 | ||
| 4500 | while ((bio = remove_bio_from_retry(conf))) { | 4493 | while ((bio = remove_bio_from_retry(conf))) { |
| 4501 | int ok; | 4494 | int ok; |
| @@ -4525,6 +4518,7 @@ static void raid5d(mddev_t *mddev) | |||
| 4525 | spin_unlock_irq(&conf->device_lock); | 4518 | spin_unlock_irq(&conf->device_lock); |
| 4526 | 4519 | ||
| 4527 | async_tx_issue_pending_all(); | 4520 | async_tx_issue_pending_all(); |
| 4521 | blk_finish_plug(&plug); | ||
| 4528 | 4522 | ||
| 4529 | pr_debug("--- raid5d inactive\n"); | 4523 | pr_debug("--- raid5d inactive\n"); |
| 4530 | } | 4524 | } |
| @@ -5141,8 +5135,6 @@ static int run(mddev_t *mddev) | |||
| 5141 | mdname(mddev)); | 5135 | mdname(mddev)); |
| 5142 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5136 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
| 5143 | 5137 | ||
| 5144 | plugger_init(&conf->plug, raid5_unplug); | ||
| 5145 | mddev->plug = &conf->plug; | ||
| 5146 | if (mddev->queue) { | 5138 | if (mddev->queue) { |
| 5147 | int chunk_size; | 5139 | int chunk_size; |
| 5148 | /* read-ahead size must cover two whole stripes, which | 5140 | /* read-ahead size must cover two whole stripes, which |
| @@ -5192,7 +5184,6 @@ static int stop(mddev_t *mddev) | |||
| 5192 | mddev->thread = NULL; | 5184 | mddev->thread = NULL; |
| 5193 | if (mddev->queue) | 5185 | if (mddev->queue) |
| 5194 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5186 | mddev->queue->backing_dev_info.congested_fn = NULL; |
| 5195 | plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ | ||
| 5196 | free_conf(conf); | 5187 | free_conf(conf); |
| 5197 | mddev->private = NULL; | 5188 | mddev->private = NULL; |
| 5198 | mddev->to_remove = &raid5_attrs_group; | 5189 | mddev->to_remove = &raid5_attrs_group; |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 8d563a4f022a..3ca77a2613ba 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -400,8 +400,6 @@ struct raid5_private_data { | |||
| 400 | * Cleared when a sync completes. | 400 | * Cleared when a sync completes. |
| 401 | */ | 401 | */ |
| 402 | 402 | ||
| 403 | struct plug_handle plug; | ||
| 404 | |||
| 405 | /* per cpu variables */ | 403 | /* per cpu variables */ |
| 406 | struct raid5_percpu { | 404 | struct raid5_percpu { |
| 407 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | 405 | struct page *spare_page; /* Used when checking P/Q in raid6 */ |
