diff options
author | Shaohua Li <shli@kernel.org> | 2014-04-08 23:25:47 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2014-04-09 00:42:38 -0400 |
commit | 27c0f68f0745218cec70f19ba7560c8c5fc3f817 (patch) | |
tree | c8f8fcefca0ced4a5e990a94094c2c2b70ee2c55 /drivers/md | |
parent | e2f23b606b94f28a8febd5aa715df697d80b018e (diff) |
raid5: make_request does less prepare wait
In NUMA machine, prepare_to_wait/finish_wait in make_request exposes a
lot of contention for sequential workload (or big request size
workload). For such workload, each bio includes several stripes. So we
can just do prepare_to_wait/finish_wait once for the whold bio instead
of every stripe. This reduces the lock contention completely for such
workload. Random workload might have the similar lock contention too,
but I didn't see it yet, maybe because my stroage is still not fast
enough.
Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 16f5c21963db..a904a2c80fc8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -4552,6 +4552,8 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4552 | struct stripe_head *sh; | 4552 | struct stripe_head *sh; |
4553 | const int rw = bio_data_dir(bi); | 4553 | const int rw = bio_data_dir(bi); |
4554 | int remaining; | 4554 | int remaining; |
4555 | DEFINE_WAIT(w); | ||
4556 | bool do_prepare; | ||
4555 | 4557 | ||
4556 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { | 4558 | if (unlikely(bi->bi_rw & REQ_FLUSH)) { |
4557 | md_flush_request(mddev, bi); | 4559 | md_flush_request(mddev, bi); |
@@ -4575,15 +4577,18 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4575 | bi->bi_next = NULL; | 4577 | bi->bi_next = NULL; |
4576 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ | 4578 | bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ |
4577 | 4579 | ||
4580 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); | ||
4578 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { | 4581 | for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { |
4579 | DEFINE_WAIT(w); | ||
4580 | int previous; | 4582 | int previous; |
4581 | int seq; | 4583 | int seq; |
4582 | 4584 | ||
4585 | do_prepare = false; | ||
4583 | retry: | 4586 | retry: |
4584 | seq = read_seqcount_begin(&conf->gen_lock); | 4587 | seq = read_seqcount_begin(&conf->gen_lock); |
4585 | previous = 0; | 4588 | previous = 0; |
4586 | prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); | 4589 | if (do_prepare) |
4590 | prepare_to_wait(&conf->wait_for_overlap, &w, | ||
4591 | TASK_UNINTERRUPTIBLE); | ||
4587 | if (unlikely(conf->reshape_progress != MaxSector)) { | 4592 | if (unlikely(conf->reshape_progress != MaxSector)) { |
4588 | /* spinlock is needed as reshape_progress may be | 4593 | /* spinlock is needed as reshape_progress may be |
4589 | * 64bit on a 32bit platform, and so it might be | 4594 | * 64bit on a 32bit platform, and so it might be |
@@ -4604,6 +4609,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4604 | : logical_sector >= conf->reshape_safe) { | 4609 | : logical_sector >= conf->reshape_safe) { |
4605 | spin_unlock_irq(&conf->device_lock); | 4610 | spin_unlock_irq(&conf->device_lock); |
4606 | schedule(); | 4611 | schedule(); |
4612 | do_prepare = true; | ||
4607 | goto retry; | 4613 | goto retry; |
4608 | } | 4614 | } |
4609 | } | 4615 | } |
@@ -4640,6 +4646,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4640 | if (must_retry) { | 4646 | if (must_retry) { |
4641 | release_stripe(sh); | 4647 | release_stripe(sh); |
4642 | schedule(); | 4648 | schedule(); |
4649 | do_prepare = true; | ||
4643 | goto retry; | 4650 | goto retry; |
4644 | } | 4651 | } |
4645 | } | 4652 | } |
@@ -4663,8 +4670,10 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4663 | prepare_to_wait(&conf->wait_for_overlap, | 4670 | prepare_to_wait(&conf->wait_for_overlap, |
4664 | &w, TASK_INTERRUPTIBLE); | 4671 | &w, TASK_INTERRUPTIBLE); |
4665 | if (logical_sector >= mddev->suspend_lo && | 4672 | if (logical_sector >= mddev->suspend_lo && |
4666 | logical_sector < mddev->suspend_hi) | 4673 | logical_sector < mddev->suspend_hi) { |
4667 | schedule(); | 4674 | schedule(); |
4675 | do_prepare = true; | ||
4676 | } | ||
4668 | goto retry; | 4677 | goto retry; |
4669 | } | 4678 | } |
4670 | 4679 | ||
@@ -4677,9 +4686,9 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4677 | md_wakeup_thread(mddev->thread); | 4686 | md_wakeup_thread(mddev->thread); |
4678 | release_stripe(sh); | 4687 | release_stripe(sh); |
4679 | schedule(); | 4688 | schedule(); |
4689 | do_prepare = true; | ||
4680 | goto retry; | 4690 | goto retry; |
4681 | } | 4691 | } |
4682 | finish_wait(&conf->wait_for_overlap, &w); | ||
4683 | set_bit(STRIPE_HANDLE, &sh->state); | 4692 | set_bit(STRIPE_HANDLE, &sh->state); |
4684 | clear_bit(STRIPE_DELAYED, &sh->state); | 4693 | clear_bit(STRIPE_DELAYED, &sh->state); |
4685 | if ((bi->bi_rw & REQ_SYNC) && | 4694 | if ((bi->bi_rw & REQ_SYNC) && |
@@ -4689,10 +4698,10 @@ static void make_request(struct mddev *mddev, struct bio * bi) | |||
4689 | } else { | 4698 | } else { |
4690 | /* cannot get stripe for read-ahead, just give-up */ | 4699 | /* cannot get stripe for read-ahead, just give-up */ |
4691 | clear_bit(BIO_UPTODATE, &bi->bi_flags); | 4700 | clear_bit(BIO_UPTODATE, &bi->bi_flags); |
4692 | finish_wait(&conf->wait_for_overlap, &w); | ||
4693 | break; | 4701 | break; |
4694 | } | 4702 | } |
4695 | } | 4703 | } |
4704 | finish_wait(&conf->wait_for_overlap, &w); | ||
4696 | 4705 | ||
4697 | remaining = raid5_dec_bi_active_stripes(bi); | 4706 | remaining = raid5_dec_bi_active_stripes(bi); |
4698 | if (remaining == 0) { | 4707 | if (remaining == 0) { |