aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorShaohua Li <shaohua.li@intel.com>2011-08-11 04:39:04 -0400
committerJens Axboe <jaxboe@fusionio.com>2011-08-11 04:39:04 -0400
commitbcf30e75b773b60379338768677a1301ef602ff9 (patch)
tree2e3e657fe4c5bbf65ffb16198e4a416429c8c173 /block
parentc09c47caedc9854d59378d6e34c989e51cfdd2b4 (diff)
block: improve rq_affinity placement
This patch reverts commit 35ae66e0a09ab70ed(block: Make rq_affinity = 1 work as expected). The purpose is to avoid an unnecessary IPI. Let's take an example. My test box has cpu 0-7, one socket. Say request is added from CPU 1, blk_complete_request() occurs at CPU 7. Without the reverted patch, softirq will be done at CPU 7. With it, an IPI will be directed to CPU 0, and softirq will be done at CPU 0. In this case, doing softirq at CPU 0 and CPU 7 have no difference from cache sharing point view and we can avoid an ipi if doing it in CPU 7. An immediate concern is this is just like QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is running in interrupt handler, and currently I/O controller doesn't support multiple interrupts (I checked several LSI cards and AHCI), so only one CPU can run blk_complete_request(). This is still quite different as QUEUE_FLAG_SAME_FORCE. Since only one CPU runs softirq, the only difference with below patch is softirq not always runs at the first CPU of a group. Signed-off-by: Shaohua Li <shaohua.li@intel.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-softirq.c16
1 files changed, 13 insertions, 3 deletions
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 487addc85bb5..58340d0cb23a 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -103,7 +103,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {
103 103
104void __blk_complete_request(struct request *req) 104void __blk_complete_request(struct request *req)
105{ 105{
106 int ccpu, cpu; 106 int ccpu, cpu, group_cpu = NR_CPUS;
107 struct request_queue *q = req->q; 107 struct request_queue *q = req->q;
108 unsigned long flags; 108 unsigned long flags;
109 109
@@ -117,12 +117,22 @@ void __blk_complete_request(struct request *req)
117 */ 117 */
118 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { 118 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
119 ccpu = req->cpu; 119 ccpu = req->cpu;
120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) 120 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
121 ccpu = blk_cpu_to_group(ccpu); 121 ccpu = blk_cpu_to_group(ccpu);
122 group_cpu = blk_cpu_to_group(cpu);
123 }
122 } else 124 } else
123 ccpu = cpu; 125 ccpu = cpu;
124 126
125 if (ccpu == cpu) { 127 /*
128 * If current CPU and requested CPU are in the same group, running
129 * softirq in current CPU. One might concern this is just like
130 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
131 * running in interrupt handler, and currently I/O controller doesn't
132 * support multiple interrupts, so current CPU is unique actually. This
133 * avoids IPI sending from current CPU to the first CPU of a group.
134 */
135 if (ccpu == cpu || ccpu == group_cpu) {
126 struct list_head *list; 136 struct list_head *list;
127do_local: 137do_local:
128 list = &__get_cpu_var(blk_cpu_done); 138 list = &__get_cpu_var(blk_cpu_done);