aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorShaohua Li <shli@kernel.org>2014-04-08 23:25:47 -0400
committerNeilBrown <neilb@suse.de>2014-04-09 00:42:38 -0400
commit27c0f68f0745218cec70f19ba7560c8c5fc3f817 (patch)
treec8f8fcefca0ced4a5e990a94094c2c2b70ee2c55 /drivers/md
parente2f23b606b94f28a8febd5aa715df697d80b018e (diff)
raid5: make_request does less prepare wait
In NUMA machine, prepare_to_wait/finish_wait in make_request exposes a lot of contention for sequential workload (or big request size workload). For such workload, each bio includes several stripes. So we can just do prepare_to_wait/finish_wait once for the whold bio instead of every stripe. This reduces the lock contention completely for such workload. Random workload might have the similar lock contention too, but I didn't see it yet, maybe because my stroage is still not fast enough. Signed-off-by: Shaohua Li <shli@fusionio.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 16f5c21963db..a904a2c80fc8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4552,6 +4552,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4552 struct stripe_head *sh; 4552 struct stripe_head *sh;
4553 const int rw = bio_data_dir(bi); 4553 const int rw = bio_data_dir(bi);
4554 int remaining; 4554 int remaining;
4555 DEFINE_WAIT(w);
4556 bool do_prepare;
4555 4557
4556 if (unlikely(bi->bi_rw & REQ_FLUSH)) { 4558 if (unlikely(bi->bi_rw & REQ_FLUSH)) {
4557 md_flush_request(mddev, bi); 4559 md_flush_request(mddev, bi);
@@ -4575,15 +4577,18 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4575 bi->bi_next = NULL; 4577 bi->bi_next = NULL;
4576 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4578 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
4577 4579
4580 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
4578 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 4581 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
4579 DEFINE_WAIT(w);
4580 int previous; 4582 int previous;
4581 int seq; 4583 int seq;
4582 4584
4585 do_prepare = false;
4583 retry: 4586 retry:
4584 seq = read_seqcount_begin(&conf->gen_lock); 4587 seq = read_seqcount_begin(&conf->gen_lock);
4585 previous = 0; 4588 previous = 0;
4586 prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); 4589 if (do_prepare)
4590 prepare_to_wait(&conf->wait_for_overlap, &w,
4591 TASK_UNINTERRUPTIBLE);
4587 if (unlikely(conf->reshape_progress != MaxSector)) { 4592 if (unlikely(conf->reshape_progress != MaxSector)) {
4588 /* spinlock is needed as reshape_progress may be 4593 /* spinlock is needed as reshape_progress may be
4589 * 64bit on a 32bit platform, and so it might be 4594 * 64bit on a 32bit platform, and so it might be
@@ -4604,6 +4609,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4604 : logical_sector >= conf->reshape_safe) { 4609 : logical_sector >= conf->reshape_safe) {
4605 spin_unlock_irq(&conf->device_lock); 4610 spin_unlock_irq(&conf->device_lock);
4606 schedule(); 4611 schedule();
4612 do_prepare = true;
4607 goto retry; 4613 goto retry;
4608 } 4614 }
4609 } 4615 }
@@ -4640,6 +4646,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4640 if (must_retry) { 4646 if (must_retry) {
4641 release_stripe(sh); 4647 release_stripe(sh);
4642 schedule(); 4648 schedule();
4649 do_prepare = true;
4643 goto retry; 4650 goto retry;
4644 } 4651 }
4645 } 4652 }
@@ -4663,8 +4670,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4663 prepare_to_wait(&conf->wait_for_overlap, 4670 prepare_to_wait(&conf->wait_for_overlap,
4664 &w, TASK_INTERRUPTIBLE); 4671 &w, TASK_INTERRUPTIBLE);
4665 if (logical_sector >= mddev->suspend_lo && 4672 if (logical_sector >= mddev->suspend_lo &&
4666 logical_sector < mddev->suspend_hi) 4673 logical_sector < mddev->suspend_hi) {
4667 schedule(); 4674 schedule();
4675 do_prepare = true;
4676 }
4668 goto retry; 4677 goto retry;
4669 } 4678 }
4670 4679
@@ -4677,9 +4686,9 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4677 md_wakeup_thread(mddev->thread); 4686 md_wakeup_thread(mddev->thread);
4678 release_stripe(sh); 4687 release_stripe(sh);
4679 schedule(); 4688 schedule();
4689 do_prepare = true;
4680 goto retry; 4690 goto retry;
4681 } 4691 }
4682 finish_wait(&conf->wait_for_overlap, &w);
4683 set_bit(STRIPE_HANDLE, &sh->state); 4692 set_bit(STRIPE_HANDLE, &sh->state);
4684 clear_bit(STRIPE_DELAYED, &sh->state); 4693 clear_bit(STRIPE_DELAYED, &sh->state);
4685 if ((bi->bi_rw & REQ_SYNC) && 4694 if ((bi->bi_rw & REQ_SYNC) &&
@@ -4689,10 +4698,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4689 } else { 4698 } else {
4690 /* cannot get stripe for read-ahead, just give-up */ 4699 /* cannot get stripe for read-ahead, just give-up */
4691 clear_bit(BIO_UPTODATE, &bi->bi_flags); 4700 clear_bit(BIO_UPTODATE, &bi->bi_flags);
4692 finish_wait(&conf->wait_for_overlap, &w);
4693 break; 4701 break;
4694 } 4702 }
4695 } 4703 }
4704 finish_wait(&conf->wait_for_overlap, &w);
4696 4705
4697 remaining = raid5_dec_bi_active_stripes(bi); 4706 remaining = raid5_dec_bi_active_stripes(bi);
4698 if (remaining == 0) { 4707 if (remaining == 0) {