aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-08-02 14:34:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-08-02 14:34:40 -0400
commit25aa6a7ae46c6a041c46a2d314b9ab7c4f2baa41 (patch)
treeb99c627c269e38450d5d0f9713862d2ed06d6e5e
parentc8924234bd9c06fe86bae648c472d56cb10640a5 (diff)
parentd9f691c365a83ce2530f0e46b947365c2db44ea0 (diff)
Merge tag 'md-3.6' of git://neil.brown.name/md
Pull additional md update from NeilBrown: "This contains a few patches that depend on plugging changes in the block layer so needed to wait for those. It also contains a Kconfig fix for the new RAID10 support in dm-raid." * tag 'md-3.6' of git://neil.brown.name/md: md/dm-raid: DM_RAID should select MD_RAID10 md/raid1: submit IO from originating thread instead of md thread. raid5: raid5d handle stripe in batch way raid5: make_request use batch stripe release
-rw-r--r--drivers/md/Kconfig5
-rw-r--r--drivers/md/bitmap.c2
-rw-r--r--drivers/md/raid1.c57
-rw-r--r--drivers/md/raid5.c107
-rw-r--r--drivers/md/raid5.h1
5 files changed, 150 insertions, 22 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 1eee45b69b71..d949b781f6f8 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -268,13 +268,14 @@ config DM_MIRROR
268 needed for live data migration tools such as 'pvmove'. 268 needed for live data migration tools such as 'pvmove'.
269 269
270config DM_RAID 270config DM_RAID
271 tristate "RAID 1/4/5/6 target" 271 tristate "RAID 1/4/5/6/10 target"
272 depends on BLK_DEV_DM 272 depends on BLK_DEV_DM
273 select MD_RAID1 273 select MD_RAID1
274 select MD_RAID10
274 select MD_RAID456 275 select MD_RAID456
275 select BLK_DEV_MD 276 select BLK_DEV_MD
276 ---help--- 277 ---help---
277 A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings 278 A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings
278 279
279 A RAID-5 set of N drives with a capacity of C MB per drive provides 280 A RAID-5 set of N drives with a capacity of C MB per drive provides
280 the capacity of C * (N - 1) MB, and protects against a failure 281 the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 15dbe03117e4..94e7f6ba2e11 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
1305 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1305 prepare_to_wait(&bitmap->overflow_wait, &__wait,
1306 TASK_UNINTERRUPTIBLE); 1306 TASK_UNINTERRUPTIBLE);
1307 spin_unlock_irq(&bitmap->counts.lock); 1307 spin_unlock_irq(&bitmap->counts.lock);
1308 io_schedule(); 1308 schedule();
1309 finish_wait(&bitmap->overflow_wait, &__wait); 1309 finish_wait(&bitmap->overflow_wait, &__wait);
1310 continue; 1310 continue;
1311 } 1311 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 9f7f8bee8442..611b5f797618 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -944,6 +944,44 @@ do_sync_io:
944 pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); 944 pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
945} 945}
946 946
947struct raid1_plug_cb {
948 struct blk_plug_cb cb;
949 struct bio_list pending;
950 int pending_cnt;
951};
952
953static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
954{
955 struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
956 cb);
957 struct mddev *mddev = plug->cb.data;
958 struct r1conf *conf = mddev->private;
959 struct bio *bio;
960
961 if (from_schedule) {
962 spin_lock_irq(&conf->device_lock);
963 bio_list_merge(&conf->pending_bio_list, &plug->pending);
964 conf->pending_count += plug->pending_cnt;
965 spin_unlock_irq(&conf->device_lock);
966 md_wakeup_thread(mddev->thread);
967 kfree(plug);
968 return;
969 }
970
971 /* we aren't scheduling, so we can do the write-out directly. */
972 bio = bio_list_get(&plug->pending);
973 bitmap_unplug(mddev->bitmap);
974 wake_up(&conf->wait_barrier);
975
976 while (bio) { /* submit pending writes */
977 struct bio *next = bio->bi_next;
978 bio->bi_next = NULL;
979 generic_make_request(bio);
980 bio = next;
981 }
982 kfree(plug);
983}
984
947static void make_request(struct mddev *mddev, struct bio * bio) 985static void make_request(struct mddev *mddev, struct bio * bio)
948{ 986{
949 struct r1conf *conf = mddev->private; 987 struct r1conf *conf = mddev->private;
@@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
957 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 995 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
958 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); 996 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
959 struct md_rdev *blocked_rdev; 997 struct md_rdev *blocked_rdev;
998 struct blk_plug_cb *cb;
999 struct raid1_plug_cb *plug = NULL;
960 int first_clone; 1000 int first_clone;
961 int sectors_handled; 1001 int sectors_handled;
962 int max_sectors; 1002 int max_sectors;
@@ -1259,11 +1299,22 @@ read_again:
1259 mbio->bi_private = r1_bio; 1299 mbio->bi_private = r1_bio;
1260 1300
1261 atomic_inc(&r1_bio->remaining); 1301 atomic_inc(&r1_bio->remaining);
1302
1303 cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
1304 if (cb)
1305 plug = container_of(cb, struct raid1_plug_cb, cb);
1306 else
1307 plug = NULL;
1262 spin_lock_irqsave(&conf->device_lock, flags); 1308 spin_lock_irqsave(&conf->device_lock, flags);
1263 bio_list_add(&conf->pending_bio_list, mbio); 1309 if (plug) {
1264 conf->pending_count++; 1310 bio_list_add(&plug->pending, mbio);
1311 plug->pending_cnt++;
1312 } else {
1313 bio_list_add(&conf->pending_bio_list, mbio);
1314 conf->pending_count++;
1315 }
1265 spin_unlock_irqrestore(&conf->device_lock, flags); 1316 spin_unlock_irqrestore(&conf->device_lock, flags);
1266 if (!mddev_check_plugged(mddev)) 1317 if (!plug)
1267 md_wakeup_thread(mddev->thread); 1318 md_wakeup_thread(mddev->thread);
1268 } 1319 }
1269 /* Mustn't call r1_bio_write_done before this next test, 1320 /* Mustn't call r1_bio_write_done before this next test,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 87a2d0bdedd1..adda94df5eb2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
484 } else { 484 } else {
485 if (atomic_read(&sh->count)) { 485 if (atomic_read(&sh->count)) {
486 BUG_ON(!list_empty(&sh->lru) 486 BUG_ON(!list_empty(&sh->lru)
487 && !test_bit(STRIPE_EXPANDING, &sh->state)); 487 && !test_bit(STRIPE_EXPANDING, &sh->state)
488 && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
488 } else { 489 } else {
489 if (!test_bit(STRIPE_HANDLE, &sh->state)) 490 if (!test_bit(STRIPE_HANDLE, &sh->state))
490 atomic_inc(&conf->active_stripes); 491 atomic_inc(&conf->active_stripes);
@@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
4010 return sh; 4011 return sh;
4011} 4012}
4012 4013
4014struct raid5_plug_cb {
4015 struct blk_plug_cb cb;
4016 struct list_head list;
4017};
4018
4019static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
4020{
4021 struct raid5_plug_cb *cb = container_of(
4022 blk_cb, struct raid5_plug_cb, cb);
4023 struct stripe_head *sh;
4024 struct mddev *mddev = cb->cb.data;
4025 struct r5conf *conf = mddev->private;
4026
4027 if (cb->list.next && !list_empty(&cb->list)) {
4028 spin_lock_irq(&conf->device_lock);
4029 while (!list_empty(&cb->list)) {
4030 sh = list_first_entry(&cb->list, struct stripe_head, lru);
4031 list_del_init(&sh->lru);
4032 /*
4033 * avoid race release_stripe_plug() sees
4034 * STRIPE_ON_UNPLUG_LIST clear but the stripe
4035 * is still in our list
4036 */
4037 smp_mb__before_clear_bit();
4038 clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
4039 __release_stripe(conf, sh);
4040 }
4041 spin_unlock_irq(&conf->device_lock);
4042 }
4043 kfree(cb);
4044}
4045
4046static void release_stripe_plug(struct mddev *mddev,
4047 struct stripe_head *sh)
4048{
4049 struct blk_plug_cb *blk_cb = blk_check_plugged(
4050 raid5_unplug, mddev,
4051 sizeof(struct raid5_plug_cb));
4052 struct raid5_plug_cb *cb;
4053
4054 if (!blk_cb) {
4055 release_stripe(sh);
4056 return;
4057 }
4058
4059 cb = container_of(blk_cb, struct raid5_plug_cb, cb);
4060
4061 if (cb->list.next == NULL)
4062 INIT_LIST_HEAD(&cb->list);
4063
4064 if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
4065 list_add_tail(&sh->lru, &cb->list);
4066 else
4067 release_stripe(sh);
4068}
4069
4013static void make_request(struct mddev *mddev, struct bio * bi) 4070static void make_request(struct mddev *mddev, struct bio * bi)
4014{ 4071{
4015 struct r5conf *conf = mddev->private; 4072 struct r5conf *conf = mddev->private;
@@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4138 if ((bi->bi_rw & REQ_NOIDLE) && 4195 if ((bi->bi_rw & REQ_NOIDLE) &&
4139 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 4196 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4140 atomic_inc(&conf->preread_active_stripes); 4197 atomic_inc(&conf->preread_active_stripes);
4141 mddev_check_plugged(mddev); 4198 release_stripe_plug(mddev, sh);
4142 release_stripe(sh);
4143 } else { 4199 } else {
4144 /* cannot get stripe for read-ahead, just give-up */ 4200 /* cannot get stripe for read-ahead, just give-up */
4145 clear_bit(BIO_UPTODATE, &bi->bi_flags); 4201 clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -4537,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4537 return handled; 4593 return handled;
4538} 4594}
4539 4595
4596#define MAX_STRIPE_BATCH 8
4597static int handle_active_stripes(struct r5conf *conf)
4598{
4599 struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
4600 int i, batch_size = 0;
4601
4602 while (batch_size < MAX_STRIPE_BATCH &&
4603 (sh = __get_priority_stripe(conf)) != NULL)
4604 batch[batch_size++] = sh;
4605
4606 if (batch_size == 0)
4607 return batch_size;
4608 spin_unlock_irq(&conf->device_lock);
4609
4610 for (i = 0; i < batch_size; i++)
4611 handle_stripe(batch[i]);
4612
4613 cond_resched();
4614
4615 spin_lock_irq(&conf->device_lock);
4616 for (i = 0; i < batch_size; i++)
4617 __release_stripe(conf, batch[i]);
4618 return batch_size;
4619}
4540 4620
4541/* 4621/*
4542 * This is our raid5 kernel thread. 4622 * This is our raid5 kernel thread.
@@ -4547,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
4547 */ 4627 */
4548static void raid5d(struct mddev *mddev) 4628static void raid5d(struct mddev *mddev)
4549{ 4629{
4550 struct stripe_head *sh;
4551 struct r5conf *conf = mddev->private; 4630 struct r5conf *conf = mddev->private;
4552 int handled; 4631 int handled;
4553 struct blk_plug plug; 4632 struct blk_plug plug;
@@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev)
4561 spin_lock_irq(&conf->device_lock); 4640 spin_lock_irq(&conf->device_lock);
4562 while (1) { 4641 while (1) {
4563 struct bio *bio; 4642 struct bio *bio;
4643 int batch_size;
4564 4644
4565 if ( 4645 if (
4566 !list_empty(&conf->bitmap_list)) { 4646 !list_empty(&conf->bitmap_list)) {
@@ -4584,21 +4664,16 @@ static void raid5d(struct mddev *mddev)
4584 handled++; 4664 handled++;
4585 } 4665 }
4586 4666
4587 sh = __get_priority_stripe(conf); 4667 batch_size = handle_active_stripes(conf);
4588 4668 if (!batch_size)
4589 if (!sh)
4590 break; 4669 break;
4591 spin_unlock_irq(&conf->device_lock); 4670 handled += batch_size;
4592
4593 handled++;
4594 handle_stripe(sh);
4595 release_stripe(sh);
4596 cond_resched();
4597 4671
4598 if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) 4672 if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
4673 spin_unlock_irq(&conf->device_lock);
4599 md_check_recovery(mddev); 4674 md_check_recovery(mddev);
4600 4675 spin_lock_irq(&conf->device_lock);
4601 spin_lock_irq(&conf->device_lock); 4676 }
4602 } 4677 }
4603 pr_debug("%d stripes handled\n", handled); 4678 pr_debug("%d stripes handled\n", handled);
4604 4679
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 61dbb615c30b..a9fc24901eda 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -321,6 +321,7 @@ enum {
321 STRIPE_BIOFILL_RUN, 321 STRIPE_BIOFILL_RUN,
322 STRIPE_COMPUTE_RUN, 322 STRIPE_COMPUTE_RUN,
323 STRIPE_OPS_REQ_PENDING, 323 STRIPE_OPS_REQ_PENDING,
324 STRIPE_ON_UNPLUG_LIST,
324}; 325};
325 326
326/* 327/*