block: add support for IO CPU affinity

This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
author: Jens Axboe <jens.axboe@oracle.com> 2008-09-13 14:26:01 -0400
committer: Jens Axboe <jens.axboe@oracle.com> 2008-10-09 02:56:09 -0400
commit: c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 (patch)
tree: ecc3d2517b3471ccc35d4cb4e3b48d4b57205061 /block/blk-core.c
parent: 18887ad910e56066233a07fd3cfb2fa11338b782 (diff)
1 files changed, 23 insertions, 23 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 9c6f818d0c33..5484838f46e7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        memset(rq, 0, sizeof(*rq));
        INIT_LIST_HEAD(&rq->queuelist);
-        INIT_LIST_HEAD(&rq->donelist);
+        rq->cpu = -1;
        rq->q = q;
        rq->sector = rq->hard_sector = (sector_t) -1;
        INIT_HLIST_NODE(&rq->hash);
@@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_unplug);
+static void blk_invoke_request_fn(struct request_queue *q)
+{
+        /*
+         * one level of recursion is ok and is much faster than kicking
+         * the unplug handling
+         */
+        if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+                q->request_fn(q);
+                queue_flag_clear(QUEUE_FLAG_REENTER, q);
+        } else {
+                queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+                kblockd_schedule_work(q, &q->unplug_work);
+        }
+}
 /**
 * blk_start_queue - restart a previously stopped queue
 * @q:    The &struct request_queue in question
@@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q)
        WARN_ON(!irqs_disabled());
        queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+        blk_invoke_request_fn(q);
-        /*
-         * one level of recursion is ok and is much faster than kicking
-         * the unplug handling
-         */
-        if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-                q->request_fn(q);
-                queue_flag_clear(QUEUE_FLAG_REENTER, q);
-        } else {
-                blk_plug_device(q);
-                kblockd_schedule_work(q, &q->unplug_work);
-        }
 }
 EXPORT_SYMBOL(blk_start_queue);
@@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q)
         * Only recurse once to avoid overrunning the stack, let the unplug
         * handling reinvoke the handler shortly if we already got there.
         */
-        if (!elv_queue_empty(q)) {
+        if (!elv_queue_empty(q))
-                if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+                blk_invoke_request_fn(q);
-                        q->request_fn(q);
-                        queue_flag_clear(QUEUE_FLAG_REENTER, q);
-                } else {
-                        blk_plug_device(q);
-                        kblockd_schedule_work(q, &q->unplug_work);
-                }
-        }
 }
 EXPORT_SYMBOL(__blk_run_queue);
@@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request);
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
+        req->cpu = bio->bi_comp_cpu;
        req->cmd_type = REQ_TYPE_FS;
        /*
@@ -1198,13 +1196,15 @@ get_rq:
        init_request_from_bio(req, bio);
        spin_lock_irq(q->queue_lock);
+        if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+            bio_flagged(bio, BIO_CPU_AFFINE))
+                req->cpu = blk_cpu_to_group(smp_processor_id());
        if (elv_queue_empty(q))
                blk_plug_device(q);
        add_request(q, req);
 out:
        if (sync)
                __generic_unplug_device(q);
        spin_unlock_irq(q->queue_lock);
        return 0;
author	Jens Axboe <jens.axboe@oracle.com>	2008-09-13 14:26:01 -0400
committer	Jens Axboe <jens.axboe@oracle.com>	2008-10-09 02:56:09 -0400
commit	c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 (patch)
tree	ecc3d2517b3471ccc35d4cb4e3b48d4b57205061 /block/blk-core.c
parent	18887ad910e56066233a07fd3cfb2fa11338b782 (diff)

diff --git a/block/blk-core.c b/block/blk-core.c index 9c6f818d0c33..5484838f46e7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c
@@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue q, struct request rq)
110	memset(rq, 0, sizeof(*rq));	110	memset(rq, 0, sizeof(*rq));
111		111
112	INIT_LIST_HEAD(&rq->queuelist);	112	INIT_LIST_HEAD(&rq->queuelist);
113	INIT_LIST_HEAD(&rq->donelist);	113	rq->cpu = -1;
114	rq->q = q;	114	rq->q = q;
115	rq->sector = rq->hard_sector = (sector_t) -1;	115	rq->sector = rq->hard_sector = (sector_t) -1;
116	INIT_HLIST_NODE(&rq->hash);	116	INIT_HLIST_NODE(&rq->hash);
@@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q)
322	}	322	}
323	EXPORT_SYMBOL(blk_unplug);	323	EXPORT_SYMBOL(blk_unplug);
324		324
		325	static void blk_invoke_request_fn(struct request_queue *q)
		326	{
		327	/*
		328	* one level of recursion is ok and is much faster than kicking
		329	* the unplug handling
		330	*/
		331	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
		332	q->request_fn(q);
		333	queue_flag_clear(QUEUE_FLAG_REENTER, q);
		334	} else {
		335	queue_flag_set(QUEUE_FLAG_PLUGGED, q);
		336	kblockd_schedule_work(q, &q->unplug_work);
		337	}
		338	}
		339
325	/**	340	/**
326	* blk_start_queue - restart a previously stopped queue	341	* blk_start_queue - restart a previously stopped queue
327	* @q: The &struct request_queue in question	342	* @q: The &struct request_queue in question
@@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q)
336	WARN_ON(!irqs_disabled());	351	WARN_ON(!irqs_disabled());
337		352
338	queue_flag_clear(QUEUE_FLAG_STOPPED, q);	353	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
339		354	blk_invoke_request_fn(q);
340	/*
341	* one level of recursion is ok and is much faster than kicking
342	* the unplug handling
343	*/
344	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
345	q->request_fn(q);
346	queue_flag_clear(QUEUE_FLAG_REENTER, q);
347	} else {
348	blk_plug_device(q);
349	kblockd_schedule_work(q, &q->unplug_work);
350	}
351	}	355	}
352	EXPORT_SYMBOL(blk_start_queue);	356	EXPORT_SYMBOL(blk_start_queue);
353		357
@@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q)
405	* Only recurse once to avoid overrunning the stack, let the unplug	409	* Only recurse once to avoid overrunning the stack, let the unplug
406	* handling reinvoke the handler shortly if we already got there.	410	* handling reinvoke the handler shortly if we already got there.
407	*/	411	*/
408	if (!elv_queue_empty(q)) {	412	if (!elv_queue_empty(q))
409	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {	413	blk_invoke_request_fn(q);
410	q->request_fn(q);
411	queue_flag_clear(QUEUE_FLAG_REENTER, q);
412	} else {
413	blk_plug_device(q);
414	kblockd_schedule_work(q, &q->unplug_work);
415	}
416	}
417	}	414	}
418	EXPORT_SYMBOL(__blk_run_queue);	415	EXPORT_SYMBOL(__blk_run_queue);
419		416
@@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request);
1056		1053
1057	void init_request_from_bio(struct request req, struct bio bio)	1054	void init_request_from_bio(struct request req, struct bio bio)
1058	{	1055	{
		1056	req->cpu = bio->bi_comp_cpu;
1059	req->cmd_type = REQ_TYPE_FS;	1057	req->cmd_type = REQ_TYPE_FS;
1060		1058
1061	/*	1059	/*
@@ -1198,13 +1196,15 @@ get_rq:
1198	init_request_from_bio(req, bio);	1196	init_request_from_bio(req, bio);
1199		1197
1200	spin_lock_irq(q->queue_lock);	1198	spin_lock_irq(q->queue_lock);
		1199	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) \|\|
		1200	bio_flagged(bio, BIO_CPU_AFFINE))
		1201	req->cpu = blk_cpu_to_group(smp_processor_id());
1201	if (elv_queue_empty(q))	1202	if (elv_queue_empty(q))
1202	blk_plug_device(q);	1203	blk_plug_device(q);
1203	add_request(q, req);	1204	add_request(q, req);
1204	out:	1205	out:
1205	if (sync)	1206	if (sync)
1206	__generic_unplug_device(q);	1207	__generic_unplug_device(q);
1207
1208	spin_unlock_irq(q->queue_lock);	1208	spin_unlock_irq(q->queue_lock);
1209	return 0;	1209	return 0;
1210		1210