diff options
| -rw-r--r-- | drivers/md/Kconfig | 5 | ||||
| -rw-r--r-- | drivers/md/bitmap.c | 2 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 57 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 107 | ||||
| -rw-r--r-- | drivers/md/raid5.h | 1 |
5 files changed, 150 insertions, 22 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 1eee45b69b71..d949b781f6f8 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig | |||
| @@ -268,13 +268,14 @@ config DM_MIRROR | |||
| 268 | needed for live data migration tools such as 'pvmove'. | 268 | needed for live data migration tools such as 'pvmove'. |
| 269 | 269 | ||
| 270 | config DM_RAID | 270 | config DM_RAID |
| 271 | tristate "RAID 1/4/5/6 target" | 271 | tristate "RAID 1/4/5/6/10 target" |
| 272 | depends on BLK_DEV_DM | 272 | depends on BLK_DEV_DM |
| 273 | select MD_RAID1 | 273 | select MD_RAID1 |
| 274 | select MD_RAID10 | ||
| 274 | select MD_RAID456 | 275 | select MD_RAID456 |
| 275 | select BLK_DEV_MD | 276 | select BLK_DEV_MD |
| 276 | ---help--- | 277 | ---help--- |
| 277 | A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings | 278 | A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings |
| 278 | 279 | ||
| 279 | A RAID-5 set of N drives with a capacity of C MB per drive provides | 280 | A RAID-5 set of N drives with a capacity of C MB per drive provides |
| 280 | the capacity of C * (N - 1) MB, and protects against a failure | 281 | the capacity of C * (N - 1) MB, and protects against a failure |
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 15dbe03117e4..94e7f6ba2e11 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
| @@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect | |||
| 1305 | prepare_to_wait(&bitmap->overflow_wait, &__wait, | 1305 | prepare_to_wait(&bitmap->overflow_wait, &__wait, |
| 1306 | TASK_UNINTERRUPTIBLE); | 1306 | TASK_UNINTERRUPTIBLE); |
| 1307 | spin_unlock_irq(&bitmap->counts.lock); | 1307 | spin_unlock_irq(&bitmap->counts.lock); |
| 1308 | io_schedule(); | 1308 | schedule(); |
| 1309 | finish_wait(&bitmap->overflow_wait, &__wait); | 1309 | finish_wait(&bitmap->overflow_wait, &__wait); |
| 1310 | continue; | 1310 | continue; |
| 1311 | } | 1311 | } |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9f7f8bee8442..611b5f797618 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -944,6 +944,44 @@ do_sync_io: | |||
| 944 | pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); | 944 | pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); |
| 945 | } | 945 | } |
| 946 | 946 | ||
| 947 | struct raid1_plug_cb { | ||
| 948 | struct blk_plug_cb cb; | ||
| 949 | struct bio_list pending; | ||
| 950 | int pending_cnt; | ||
| 951 | }; | ||
| 952 | |||
| 953 | static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) | ||
| 954 | { | ||
| 955 | struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, | ||
| 956 | cb); | ||
| 957 | struct mddev *mddev = plug->cb.data; | ||
| 958 | struct r1conf *conf = mddev->private; | ||
| 959 | struct bio *bio; | ||
| 960 | |||
| 961 | if (from_schedule) { | ||
| 962 | spin_lock_irq(&conf->device_lock); | ||
| 963 | bio_list_merge(&conf->pending_bio_list, &plug->pending); | ||
| 964 | conf->pending_count += plug->pending_cnt; | ||
| 965 | spin_unlock_irq(&conf->device_lock); | ||
| 966 | md_wakeup_thread(mddev->thread); | ||
| 967 | kfree(plug); | ||
| 968 | return; | ||
| 969 | } | ||
| 970 | |||
| 971 | /* we aren't scheduling, so we can do the write-out directly. */ | ||
| 972 | bio = bio_list_get(&plug->pending); | ||
| 973 | bitmap_unplug(mddev->bitmap); | ||
| 974 | wake_up(&conf->wait_barrier); | ||
| 975 | |||
| 976 | while (bio) { /* submit pending writes */ | ||
| 977 | struct bio *next = bio->bi_next; | ||
| 978 | bio->bi_next = NULL; | ||
| 979 | generic_make_request(bio); | ||
| 980 | bio = next; | ||
| 981 | } | ||
| 982 | kfree(plug); | ||
| 983 | } | ||
| 984 | |||
| 947 | static void make_request(struct mddev *mddev, struct bio * bio) | 985 | static void make_request(struct mddev *mddev, struct bio * bio) |
| 948 | { | 986 | { |
| 949 | struct r1conf *conf = mddev->private; | 987 | struct r1conf *conf = mddev->private; |
| @@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
| 957 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 995 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
| 958 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 996 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
| 959 | struct md_rdev *blocked_rdev; | 997 | struct md_rdev *blocked_rdev; |
| 998 | struct blk_plug_cb *cb; | ||
| 999 | struct raid1_plug_cb *plug = NULL; | ||
| 960 | int first_clone; | 1000 | int first_clone; |
| 961 | int sectors_handled; | 1001 | int sectors_handled; |
| 962 | int max_sectors; | 1002 | int max_sectors; |
| @@ -1259,11 +1299,22 @@ read_again: | |||
| 1259 | mbio->bi_private = r1_bio; | 1299 | mbio->bi_private = r1_bio; |
| 1260 | 1300 | ||
| 1261 | atomic_inc(&r1_bio->remaining); | 1301 | atomic_inc(&r1_bio->remaining); |
| 1302 | |||
| 1303 | cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); | ||
| 1304 | if (cb) | ||
| 1305 | plug = container_of(cb, struct raid1_plug_cb, cb); | ||
| 1306 | else | ||
| 1307 | plug = NULL; | ||
| 1262 | spin_lock_irqsave(&conf->device_lock, flags); | 1308 | spin_lock_irqsave(&conf->device_lock, flags); |
| 1263 | bio_list_add(&conf->pending_bio_list, mbio); | 1309 | if (plug) { |
| 1264 | conf->pending_count++; | 1310 | bio_list_add(&plug->pending, mbio); |
| 1311 | plug->pending_cnt++; | ||
| 1312 | } else { | ||
| 1313 | bio_list_add(&conf->pending_bio_list, mbio); | ||
| 1314 | conf->pending_count++; | ||
| 1315 | } | ||
| 1265 | spin_unlock_irqrestore(&conf->device_lock, flags); | 1316 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 1266 | if (!mddev_check_plugged(mddev)) | 1317 | if (!plug) |
| 1267 | md_wakeup_thread(mddev->thread); | 1318 | md_wakeup_thread(mddev->thread); |
| 1268 | } | 1319 | } |
| 1269 | /* Mustn't call r1_bio_write_done before this next test, | 1320 | /* Mustn't call r1_bio_write_done before this next test, |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 87a2d0bdedd1..adda94df5eb2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
| 484 | } else { | 484 | } else { |
| 485 | if (atomic_read(&sh->count)) { | 485 | if (atomic_read(&sh->count)) { |
| 486 | BUG_ON(!list_empty(&sh->lru) | 486 | BUG_ON(!list_empty(&sh->lru) |
| 487 | && !test_bit(STRIPE_EXPANDING, &sh->state)); | 487 | && !test_bit(STRIPE_EXPANDING, &sh->state) |
| 488 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)); | ||
| 488 | } else { | 489 | } else { |
| 489 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 490 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
| 490 | atomic_inc(&conf->active_stripes); | 491 | atomic_inc(&conf->active_stripes); |
| @@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf) | |||
| 4010 | return sh; | 4011 | return sh; |
| 4011 | } | 4012 | } |
| 4012 | 4013 | ||
| 4014 | struct raid5_plug_cb { | ||
| 4015 | struct blk_plug_cb cb; | ||
| 4016 | struct list_head list; | ||
| 4017 | }; | ||
| 4018 | |||
| 4019 | static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule) | ||
| 4020 | { | ||
| 4021 | struct raid5_plug_cb *cb = container_of( | ||
| 4022 | blk_cb, struct raid5_plug_cb, cb); | ||
| 4023 | struct stripe_head *sh; | ||
| 4024 | struct mddev *mddev = cb->cb.data; | ||
| 4025 | struct r5conf *conf = mddev->private; | ||
| 4026 | |||
| 4027 | if (cb->list.next && !list_empty(&cb->list)) { | ||
| 4028 | spin_lock_irq(&conf->device_lock); | ||
| 4029 | while (!list_empty(&cb->list)) { | ||
| 4030 | sh = list_first_entry(&cb->list, struct stripe_head, lru); | ||
| 4031 | list_del_init(&sh->lru); | ||
| 4032 | /* | ||
| 4033 | * avoid race release_stripe_plug() sees | ||
| 4034 | * STRIPE_ON_UNPLUG_LIST clear but the stripe | ||
| 4035 | * is still in our list | ||
| 4036 | */ | ||
| 4037 | smp_mb__before_clear_bit(); | ||
| 4038 | clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state); | ||
| 4039 | __release_stripe(conf, sh); | ||
| 4040 | } | ||
| 4041 | spin_unlock_irq(&conf->device_lock); | ||
| 4042 | } | ||
| 4043 | kfree(cb); | ||
| 4044 | } | ||
| 4045 | |||
| 4046 | static void release_stripe_plug(struct mddev *mddev, | ||
| 4047 | struct stripe_head *sh) | ||
| 4048 | { | ||
| 4049 | struct blk_plug_cb *blk_cb = blk_check_plugged( | ||
| 4050 | raid5_unplug, mddev, | ||
| 4051 | sizeof(struct raid5_plug_cb)); | ||
| 4052 | struct raid5_plug_cb *cb; | ||
| 4053 | |||
| 4054 | if (!blk_cb) { | ||
| 4055 | release_stripe(sh); | ||
| 4056 | return; | ||
| 4057 | } | ||
| 4058 | |||
| 4059 | cb = container_of(blk_cb, struct raid5_plug_cb, cb); | ||
| 4060 | |||
| 4061 | if (cb->list.next == NULL) | ||
| 4062 | INIT_LIST_HEAD(&cb->list); | ||
| 4063 | |||
| 4064 | if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) | ||
| 4065 | list_add_tail(&sh->lru, &cb->list); | ||
| 4066 | else | ||
| 4067 | release_stripe(sh); | ||
| 4068 | } | ||
| 4069 | |||
| 4013 | static void make_request(struct mddev *mddev, struct bio * bi) | 4070 | static void make_request(struct mddev *mddev, struct bio * bi) |
| 4014 | { | 4071 | { |
| 4015 | struct r5conf *conf = mddev->private; | 4072 | struct r5conf *conf = mddev->private; |
| @@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
| 4138 | if ((bi->bi_rw & REQ_NOIDLE) && | 4195 | if ((bi->bi_rw & REQ_NOIDLE) && |
| 4139 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 4196 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
| 4140 | atomic_inc(&conf->preread_active_stripes); | 4197 | atomic_inc(&conf->preread_active_stripes); |
| 4141 | mddev_check_plugged(mddev); | 4198 | release_stripe_plug(mddev, sh); |
| 4142 | release_stripe(sh); | ||
| 4143 | } else { | 4199 | } else { |
| 4144 | /* cannot get stripe for read-ahead, just give-up */ | 4200 | /* cannot get stripe for read-ahead, just give-up */ |
| 4145 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 4201 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
| @@ -4537,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
| 4537 | return handled; | 4593 | return handled; |
| 4538 | } | 4594 | } |
| 4539 | 4595 | ||
| 4596 | #define MAX_STRIPE_BATCH 8 | ||
| 4597 | static int handle_active_stripes(struct r5conf *conf) | ||
| 4598 | { | ||
| 4599 | struct stripe_head *batch[MAX_STRIPE_BATCH], *sh; | ||
| 4600 | int i, batch_size = 0; | ||
| 4601 | |||
| 4602 | while (batch_size < MAX_STRIPE_BATCH && | ||
| 4603 | (sh = __get_priority_stripe(conf)) != NULL) | ||
| 4604 | batch[batch_size++] = sh; | ||
| 4605 | |||
| 4606 | if (batch_size == 0) | ||
| 4607 | return batch_size; | ||
| 4608 | spin_unlock_irq(&conf->device_lock); | ||
| 4609 | |||
| 4610 | for (i = 0; i < batch_size; i++) | ||
| 4611 | handle_stripe(batch[i]); | ||
| 4612 | |||
| 4613 | cond_resched(); | ||
| 4614 | |||
| 4615 | spin_lock_irq(&conf->device_lock); | ||
| 4616 | for (i = 0; i < batch_size; i++) | ||
| 4617 | __release_stripe(conf, batch[i]); | ||
| 4618 | return batch_size; | ||
| 4619 | } | ||
| 4540 | 4620 | ||
| 4541 | /* | 4621 | /* |
| 4542 | * This is our raid5 kernel thread. | 4622 | * This is our raid5 kernel thread. |
| @@ -4547,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
| 4547 | */ | 4627 | */ |
| 4548 | static void raid5d(struct mddev *mddev) | 4628 | static void raid5d(struct mddev *mddev) |
| 4549 | { | 4629 | { |
| 4550 | struct stripe_head *sh; | ||
| 4551 | struct r5conf *conf = mddev->private; | 4630 | struct r5conf *conf = mddev->private; |
| 4552 | int handled; | 4631 | int handled; |
| 4553 | struct blk_plug plug; | 4632 | struct blk_plug plug; |
| @@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev) | |||
| 4561 | spin_lock_irq(&conf->device_lock); | 4640 | spin_lock_irq(&conf->device_lock); |
| 4562 | while (1) { | 4641 | while (1) { |
| 4563 | struct bio *bio; | 4642 | struct bio *bio; |
| 4643 | int batch_size; | ||
| 4564 | 4644 | ||
| 4565 | if ( | 4645 | if ( |
| 4566 | !list_empty(&conf->bitmap_list)) { | 4646 | !list_empty(&conf->bitmap_list)) { |
| @@ -4584,21 +4664,16 @@ static void raid5d(struct mddev *mddev) | |||
| 4584 | handled++; | 4664 | handled++; |
| 4585 | } | 4665 | } |
| 4586 | 4666 | ||
| 4587 | sh = __get_priority_stripe(conf); | 4667 | batch_size = handle_active_stripes(conf); |
| 4588 | 4668 | if (!batch_size) | |
| 4589 | if (!sh) | ||
| 4590 | break; | 4669 | break; |
| 4591 | spin_unlock_irq(&conf->device_lock); | 4670 | handled += batch_size; |
| 4592 | |||
| 4593 | handled++; | ||
| 4594 | handle_stripe(sh); | ||
| 4595 | release_stripe(sh); | ||
| 4596 | cond_resched(); | ||
| 4597 | 4671 | ||
| 4598 | if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) | 4672 | if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) { |
| 4673 | spin_unlock_irq(&conf->device_lock); | ||
| 4599 | md_check_recovery(mddev); | 4674 | md_check_recovery(mddev); |
| 4600 | 4675 | spin_lock_irq(&conf->device_lock); | |
| 4601 | spin_lock_irq(&conf->device_lock); | 4676 | } |
| 4602 | } | 4677 | } |
| 4603 | pr_debug("%d stripes handled\n", handled); | 4678 | pr_debug("%d stripes handled\n", handled); |
| 4604 | 4679 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 61dbb615c30b..a9fc24901eda 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
| @@ -321,6 +321,7 @@ enum { | |||
| 321 | STRIPE_BIOFILL_RUN, | 321 | STRIPE_BIOFILL_RUN, |
| 322 | STRIPE_COMPUTE_RUN, | 322 | STRIPE_COMPUTE_RUN, |
| 323 | STRIPE_OPS_REQ_PENDING, | 323 | STRIPE_OPS_REQ_PENDING, |
| 324 | STRIPE_ON_UNPLUG_LIST, | ||
| 324 | }; | 325 | }; |
| 325 | 326 | ||
| 326 | /* | 327 | /* |
