diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-02 14:34:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-02 14:34:40 -0400 |
commit | 25aa6a7ae46c6a041c46a2d314b9ab7c4f2baa41 (patch) | |
tree | b99c627c269e38450d5d0f9713862d2ed06d6e5e | |
parent | c8924234bd9c06fe86bae648c472d56cb10640a5 (diff) | |
parent | d9f691c365a83ce2530f0e46b947365c2db44ea0 (diff) |
Merge tag 'md-3.6' of git://neil.brown.name/md
Pull additional md update from NeilBrown:
"This contains a few patches that depend on plugging changes in the
block layer so needed to wait for those.
It also contains a Kconfig fix for the new RAID10 support in dm-raid."
* tag 'md-3.6' of git://neil.brown.name/md:
md/dm-raid: DM_RAID should select MD_RAID10
md/raid1: submit IO from originating thread instead of md thread.
raid5: raid5d handle stripe in batch way
raid5: make_request use batch stripe release
-rw-r--r-- | drivers/md/Kconfig | 5 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 57 | ||||
-rw-r--r-- | drivers/md/raid5.c | 107 | ||||
-rw-r--r-- | drivers/md/raid5.h | 1 |
5 files changed, 150 insertions, 22 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1eee45b69b71..d949b781f6f8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
@@ -268,13 +268,14 @@ config DM_MIRROR | |||
268 | needed for live data migration tools such as 'pvmove'. | 268 | needed for live data migration tools such as 'pvmove'. |
269 | 269 | ||
270 | config DM_RAID | 270 | config DM_RAID |
271 | tristate "RAID 1/4/5/6 target" | 271 | tristate "RAID 1/4/5/6/10 target" |
272 | depends on BLK_DEV_DM | 272 | depends on BLK_DEV_DM |
273 | select MD_RAID1 | 273 | select MD_RAID1 |
274 | select MD_RAID10 | ||
274 | select MD_RAID456 | 275 | select MD_RAID456 |
275 | select BLK_DEV_MD | 276 | select BLK_DEV_MD |
276 | ---help--- | 277 | ---help--- |
277 | A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings | 278 | A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings |
278 | 279 | ||
279 | A RAID-5 set of N drives with a capacity of C MB per drive provides | 280 | A RAID-5 set of N drives with a capacity of C MB per drive provides |
280 | the capacity of C * (N - 1) MB, and protects against a failure | 281 | the capacity of C * (N - 1) MB, and protects against a failure |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 15dbe03117e4..94e7f6ba2e11 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
1305 | prepare_to_wait(&bitmap->overflow_wait, &__wait, | 1305 | prepare_to_wait(&bitmap->overflow_wait, &__wait, |
1306 | TASK_UNINTERRUPTIBLE); | 1306 | TASK_UNINTERRUPTIBLE); |
1307 | spin_unlock_irq(&bitmap->counts.lock); | 1307 | spin_unlock_irq(&bitmap->counts.lock); |
1308 | io_schedule(); | 1308 | schedule(); |
1309 | finish_wait(&bitmap->overflow_wait, &__wait); | 1309 | finish_wait(&bitmap->overflow_wait, &__wait); |
1310 | continue; | 1310 | continue; |
1311 | } | 1311 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9f7f8bee8442..611b5f797618 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -944,6 +944,44 @@ do_sync_io: | |||
944 | pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); | 944 | pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); |
945 | } | 945 | } |
946 | 946 | ||
947 | struct raid1_plug_cb { | ||
948 | struct blk_plug_cb cb; | ||
949 | struct bio_list pending; | ||
950 | int pending_cnt; | ||
951 | }; | ||
952 | |||
953 | static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | ||
954 | { | ||
955 | struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, | ||
956 | cb); | ||
957 | struct mddev *mddev = plug->cb.data; | ||
958 | struct r1conf *conf = mddev->private; | ||
959 | struct bio *bio; | ||
960 | |||
961 | if (from_schedule) { | ||
962 | spin_lock_irq(&conf->device_lock); | ||
963 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | ||
964 | conf->pending_count += plug->pending_cnt; | ||
965 | spin_unlock_irq(&conf->device_lock); | ||
966 | md_wakeup_thread(mddev->thread); | ||
967 | kfree(plug); | ||
968 | return; | ||
969 | } | ||
970 | |||
971 | /* we aren't scheduling, so we can do the write-out directly. */ | ||
972 | bio = bio_list_get(&plug->pending); | ||
973 | bitmap_unplug(mddev->bitmap); | ||
974 | wake_up(&conf->wait_barrier); | ||
975 | |||
976 | while (bio) { /* submit pending writes */ | ||
977 | struct bio *next = bio->bi_next; | ||
978 | bio->bi_next = NULL; | ||
979 | generic_make_request(bio); | ||
980 | bio = next; | ||
981 | } | ||
982 | kfree(plug); | ||
983 | } | ||
984 | |||
947 | static void make_request(struct mddev *mddev, struct bio * bio) | 985 | static void make_request(struct mddev *mddev, struct bio * bio) |
948 | { | 986 | { |
949 | struct r1conf *conf = mddev->private; | 987 | struct r1conf *conf = mddev->private; |
@@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
957 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 995 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
958 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 996 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
959 | struct md_rdev *blocked_rdev; | 997 | struct md_rdev *blocked_rdev; |
998 | struct blk_plug_cb *cb; | ||
999 | struct raid1_plug_cb *plug = NULL; | ||
960 | int first_clone; | 1000 | int first_clone; |
961 | int sectors_handled; | 1001 | int sectors_handled; |
962 | int max_sectors; | 1002 | int max_sectors; |
@@ -1259,11 +1299,22 @@ read_again: | |||
1259 | mbio->bi_private = r1_bio; | 1299 | mbio->bi_private = r1_bio; |
1260 | 1300 | ||
1261 | atomic_inc(&r1_bio->remaining); | 1301 | atomic_inc(&r1_bio->remaining); |
1302 | |||
1303 | cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); | ||
1304 | if (cb) | ||
1305 | plug = container_of(cb, struct raid1_plug_cb, cb); | ||
1306 | else | ||
1307 | plug = NULL; | ||
1262 | spin_lock_irqsave(&conf->device_lock, flags); | 1308 | spin_lock_irqsave(&conf->device_lock, flags); |
1263 | bio_list_add(&conf->pending_bio_list, mbio); | 1309 | if (plug) { |
1264 | conf->pending_count++; | 1310 | bio_list_add(&plug->pending, mbio); |
1311 | plug->pending_cnt++; | ||
1312 | } else { | ||
1313 | bio_list_add(&conf->pending_bio_list, mbio); | ||
1314 | conf->pending_count++; | ||
1315 | } | ||
1265 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1316 | spin_unlock_irqrestore(&conf->device_lock, flags); |
1266 | if (!mddev_check_plugged(mddev)) | 1317 | if (!plug) |
1267 | md_wakeup_thread(mddev->thread); | 1318 | md_wakeup_thread(mddev->thread); |
1268 | } | 1319 | } |
1269 | /* Mustn't call r1_bio_write_done before this next test, | 1320 | /* Mustn't call r1_bio_write_done before this next test, |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 87a2d0bdedd1..adda94df5eb2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
484 | } else { | 484 | } else { |
485 | if (atomic_read(&sh->count)) { | 485 | if (atomic_read(&sh->count)) { |
486 | BUG_ON(!list_empty(&sh->lru) | 486 | BUG_ON(!list_empty(&sh->lru) |
487 | && !test_bit(STRIPE_EXPANDING, &sh->state)); | 487 | && !test_bit(STRIPE_EXPANDING, &sh->state) |
488 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); | ||
488 | } else { | 489 | } else { |
489 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 490 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
490 | atomic_inc(&conf->active_stripes); | 491 | atomic_inc(&conf->active_stripes); |
@@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | |||
4010 | return sh; | 4011 | return sh; |
4011 | } | 4012 | } |
4012 | 4013 | ||
4014 | struct raid5_plug_cb { | ||
4015 | struct blk_plug_cb cb; | ||
4016 | struct list_head list; | ||
4017 | }; | ||
4018 | |||
4019 | static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | ||
4020 | { | ||
4021 | struct raid5_plug_cb *cb = container_of( | ||
4022 | blk_cb, struct raid5_plug_cb, cb); | ||
4023 | struct stripe_head *sh; | ||
4024 | struct mddev *mddev = cb->cb.data; | ||
4025 | struct r5conf *conf = mddev->private; | ||
4026 | |||
4027 | if (cb->list.next && !list_empty(&cb->list)) { | ||
4028 | spin_lock_irq(&conf->device_lock); | ||
4029 | while (!list_empty(&cb->list)) { | ||
4030 | sh = list_first_entry(&cb->list, struct stripe_head, lru); | ||
4031 | list_del_init(&sh->lru); | ||
4032 | /* | ||
4033 | * avoid race release_stripe_plug() sees | ||
4034 | * STRIPE_ON_UNPLUG_LIST clear but the stripe | ||
4035 | * is still in our list | ||
4036 | */ | ||
4037 | smp_mb__before_clear_bit(); | ||
4038 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); | ||
4039 | __release_stripe(conf, sh); | ||
4040 | } | ||
4041 | spin_unlock_irq(&conf->device_lock); | ||
4042 | } | ||
4043 | kfree(cb); | ||
4044 | } | ||
4045 | |||
4046 | static void release_stripe_plug(struct mddev *mddev, | ||
4047 | struct stripe_head *sh) | ||
4048 | { | ||
4049 | struct blk_plug_cb *blk_cb = blk_check_plugged( | ||
4050 | raid5_unplug, mddev, | ||
4051 | sizeof(struct raid5_plug_cb)); | ||
4052 | struct raid5_plug_cb *cb; | ||
4053 | |||
4054 | if (!blk_cb) { | ||
4055 | release_stripe(sh); | ||
4056 | return; | ||
4057 | } | ||
4058 | |||
4059 | cb = container_of(blk_cb, struct raid5_plug_cb, cb); | ||
4060 | |||
4061 | if (cb->list.next == NULL) | ||
4062 | INIT_LIST_HEAD(&cb->list); | ||
4063 | |||
4064 | if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) | ||
4065 | list_add_tail(&sh->lru, &cb->list); | ||
4066 | else | ||
4067 | release_stripe(sh); | ||
4068 | } | ||
4069 | |||
4013 | static void make_request(struct mddev *mddev, struct bio * bi) | 4070 | static void make_request(struct mddev *mddev, struct bio * bi) |
4014 | { | 4071 | { |
4015 | struct r5conf *conf = mddev->private; | 4072 | struct r5conf *conf = mddev->private; |
@@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4138 | if ((bi->bi_rw & REQ_NOIDLE) && | 4195 | if ((bi->bi_rw & REQ_NOIDLE) && |
4139 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4196 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
4140 | atomic_inc(&conf->preread_active_stripes); | 4197 | atomic_inc(&conf->preread_active_stripes); |
4141 | mddev_check_plugged(mddev); | 4198 | release_stripe_plug(mddev, sh); |
4142 | release_stripe(sh); | ||
4143 | } else { | 4199 | } else { |
4144 | /* cannot get stripe for read-ahead, just give-up */ | 4200 | /* cannot get stripe for read-ahead, just give-up */ |
4145 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 4201 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
@@ -4537,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4537 | return handled; | 4593 | return handled; |
4538 | } | 4594 | } |
4539 | 4595 | ||
4596 | #define MAX_STRIPE_BATCH 8 | ||
4597 | static int handle_active_stripes(struct r5conf *conf) | ||
4598 | { | ||
4599 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | ||
4600 | int i, batch_size = 0; | ||
4601 | |||
4602 | while (batch_size < MAX_STRIPE_BATCH && | ||
4603 | (sh = __get_priority_stripe(conf)) != NULL) | ||
4604 | batch[batch_size++] = sh; | ||
4605 | |||
4606 | if (batch_size == 0) | ||
4607 | return batch_size; | ||
4608 | spin_unlock_irq(&conf->device_lock); | ||
4609 | |||
4610 | for (i = 0; i < batch_size; i++) | ||
4611 | handle_stripe(batch[i]); | ||
4612 | |||
4613 | cond_resched(); | ||
4614 | |||
4615 | spin_lock_irq(&conf->device_lock); | ||
4616 | for (i = 0; i < batch_size; i++) | ||
4617 | __release_stripe(conf, batch[i]); | ||
4618 | return batch_size; | ||
4619 | } | ||
4540 | 4620 | ||
4541 | /* | 4621 | /* |
4542 | * This is our raid5 kernel thread. | 4622 | * This is our raid5 kernel thread. |
@@ -4547,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
4547 | */ | 4627 | */ |
4548 | static void raid5d(struct mddev *mddev) | 4628 | static void raid5d(struct mddev *mddev) |
4549 | { | 4629 | { |
4550 | struct stripe_head *sh; | ||
4551 | struct r5conf *conf = mddev->private; | 4630 | struct r5conf *conf = mddev->private; |
4552 | int handled; | 4631 | int handled; |
4553 | struct blk_plug plug; | 4632 | struct blk_plug plug; |
@@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev) | |||
4561 | spin_lock_irq(&conf->device_lock); | 4640 | spin_lock_irq(&conf->device_lock); |
4562 | while (1) { | 4641 | while (1) { |
4563 | struct bio *bio; | 4642 | struct bio *bio; |
4643 | int batch_size; | ||
4564 | 4644 | ||
4565 | if ( | 4645 | if ( |
4566 | !list_empty(&conf->bitmap_list)) { | 4646 | !list_empty(&conf->bitmap_list)) { |
@@ -4584,21 +4664,16 @@ static void raid5d(struct mddev *mddev) | |||
4584 | handled++; | 4664 | handled++; |
4585 | } | 4665 | } |
4586 | 4666 | ||
4587 | sh = __get_priority_stripe(conf); | 4667 | batch_size = handle_active_stripes(conf); |
4588 | 4668 | if (!batch_size) | |
4589 | if (!sh) | ||
4590 | break; | 4669 | break; |
4591 | spin_unlock_irq(&conf->device_lock); | 4670 | handled += batch_size; |
4592 | |||
4593 | handled++; | ||
4594 | handle_stripe(sh); | ||
4595 | release_stripe(sh); | ||
4596 | cond_resched(); | ||
4597 | 4671 | ||
4598 | if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) | 4672 | if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) { |
4673 | spin_unlock_irq(&conf->device_lock); | ||
4599 | md_check_recovery(mddev); | 4674 | md_check_recovery(mddev); |
4600 | 4675 | spin_lock_irq(&conf->device_lock); | |
4601 | spin_lock_irq(&conf->device_lock); | 4676 | } |
4602 | } | 4677 | } |
4603 | pr_debug("%d stripes handled\n", handled); | 4678 | pr_debug("%d stripes handled\n", handled); |
4604 | 4679 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 61dbb615c30b..a9fc24901eda 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -321,6 +321,7 @@ enum { | |||
321 | STRIPE_BIOFILL_RUN, | 321 | STRIPE_BIOFILL_RUN, |
322 | STRIPE_COMPUTE_RUN, | 322 | STRIPE_COMPUTE_RUN, |
323 | STRIPE_OPS_REQ_PENDING, | 323 | STRIPE_OPS_REQ_PENDING, |
324 | STRIPE_ON_UNPLUG_LIST, | ||
324 | }; | 325 | }; |
325 | 326 | ||
326 | /* | 327 | /* |