aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlan D. Brunelle <Alan.Brunelle@hp.com>2010-01-29 03:04:08 -0500
committerJens Axboe <jens.axboe@oracle.com>2010-01-29 03:04:08 -0500
commit488991e28e55b4fbca8067edf0259f69d1a6f92c (patch)
treefea5e0aca42e338137cc050e66aaeb5f539e3d21
parent47483e25205f1f8d79784f0f7c733941bc080ec0 (diff)
block: Added in stricter no merge semantics for block I/O
Updated 'nomerges' tunable to accept a value of '2' - indicating that _no_ merges at all are to be attempted (not even the simple one-hit cache). The following table illustrates the additional benefit - 5 minute runs of a random I/O load were applied to a dozen devices on a 16-way x86_64 system. nomerges Throughput %System Improvement (tput / %sys) -------- ------------ ----------- ------------------------- 0 12.45 MB/sec 0.669365609 1 12.50 MB/sec 0.641519199 0.40% / 2.71% 2 12.52 MB/sec 0.639849750 0.56% / 2.96% Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--Documentation/ABI/testing/sysfs-block14
-rw-r--r--Documentation/block/queue-sysfs.txt10
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--block/elevator.c11
-rw-r--r--include/linux/blkdev.h3
5 files changed, 39 insertions, 10 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index d2f90334bb93..4873c759d535 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -128,3 +128,17 @@ Description:
128 preferred request size for workloads where sustained 128 preferred request size for workloads where sustained
129 throughput is desired. If no optimal I/O size is 129 throughput is desired. If no optimal I/O size is
130 reported this file contains 0. 130 reported this file contains 0.
131
132What: /sys/block/<disk>/queue/nomerges
133Date: January 2010
134Contact:
135Description:
136 Standard I/O elevator operations include attempts to
137 merge contiguous I/Os. For known random I/O loads these
138 attempts will always fail and result in extra cycles
139 being spent in the kernel. This allows one to turn off
140 this behavior on one of two ways: When set to 1, complex
141 merge checks are disabled, but the simple one-shot merges
142 with the previous I/O request are enabled. When set to 2,
143 all merge tries are disabled. The default value is 0 -
144 which enables all types of merge tries.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index e164403f60e1..f65274081c8d 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -25,11 +25,11 @@ size allowed by the hardware.
25 25
26nomerges (RW) 26nomerges (RW)
27------------- 27-------------
28This enables the user to disable the lookup logic involved with IO merging 28This enables the user to disable the lookup logic involved with IO
29requests in the block layer. Merging may still occur through a direct 29merging requests in the block layer. By default (0) all merges are
301-hit cache, since that comes for (almost) free. The IO scheduler will not 30enabled. When set to 1 only simple one-hit merges will be tried. When
31waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults 31set to 2 no merge algorithms will be tried (including one-hit or more
32to 0, enabling all merges. 32complex tree/hash lookups).
33 33
34nr_requests (RW) 34nr_requests (RW)
35---------------- 35----------------
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8606c9543fdd..e85442415db3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
189 189
190static ssize_t queue_nomerges_show(struct request_queue *q, char *page) 190static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
191{ 191{
192 return queue_var_show(blk_queue_nomerges(q), page); 192 return queue_var_show((blk_queue_nomerges(q) << 1) |
193 blk_queue_noxmerges(q), page);
193} 194}
194 195
195static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, 196static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
@@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
199 ssize_t ret = queue_var_store(&nm, page, count); 200 ssize_t ret = queue_var_store(&nm, page, count);
200 201
201 spin_lock_irq(q->queue_lock); 202 spin_lock_irq(q->queue_lock);
202 if (nm) 203 queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
204 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
205 if (nm == 2)
203 queue_flag_set(QUEUE_FLAG_NOMERGES, q); 206 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
204 else 207 else if (nm)
205 queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 208 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
206 spin_unlock_irq(q->queue_lock); 209 spin_unlock_irq(q->queue_lock);
207 210
208 return ret; 211 return ret;
diff --git a/block/elevator.c b/block/elevator.c
index 9ad5ccc4c5ee..ee3a883840f2 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -474,6 +474,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
474 int ret; 474 int ret;
475 475
476 /* 476 /*
477 * Levels of merges:
478 * nomerges: No merges at all attempted
479 * noxmerges: Only simple one-hit cache try
480 * merges: All merge tries attempted
481 */
482 if (blk_queue_nomerges(q))
483 return ELEVATOR_NO_MERGE;
484
485 /*
477 * First try one-hit cache. 486 * First try one-hit cache.
478 */ 487 */
479 if (q->last_merge) { 488 if (q->last_merge) {
@@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
484 } 493 }
485 } 494 }
486 495
487 if (blk_queue_nomerges(q)) 496 if (blk_queue_noxmerges(q))
488 return ELEVATOR_NO_MERGE; 497 return ELEVATOR_NO_MERGE;
489 498
490 /* 499 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ffb13ad35716..f71f5c58620c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -463,6 +463,7 @@ struct request_queue
463#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ 463#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
464#define QUEUE_FLAG_CQ 16 /* hardware does queuing */ 464#define QUEUE_FLAG_CQ 16 /* hardware does queuing */
465#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ 465#define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */
466#define QUEUE_FLAG_NOXMERGES 18 /* No extended merges */
466 467
467#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 468#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
468 (1 << QUEUE_FLAG_CLUSTER) | \ 469 (1 << QUEUE_FLAG_CLUSTER) | \
@@ -589,6 +590,8 @@ enum {
589#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) 590#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
590#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 591#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
591#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 592#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
593#define blk_queue_noxmerges(q) \
594 test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
592#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 595#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
593#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) 596#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
594#define blk_queue_flushing(q) ((q)->ordseq) 597#define blk_queue_flushing(q) ((q)->ordseq)