diff options
author | Alan D. Brunelle <Alan.Brunelle@hp.com> | 2010-01-29 03:04:08 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-01-29 03:04:08 -0500 |
commit | 488991e28e55b4fbca8067edf0259f69d1a6f92c (patch) | |
tree | fea5e0aca42e338137cc050e66aaeb5f539e3d21 | |
parent | 47483e25205f1f8d79784f0f7c733941bc080ec0 (diff) |
block: Added in stricter no merge semantics for block I/O
Updated 'nomerges' tunable to accept a value of '2' - indicating that _no_
merges at all are to be attempted (not even the simple one-hit cache).
The following table illustrates the additional benefit - 5 minute runs of
a random I/O load were applied to a dozen devices on a 16-way x86_64 system.
nomerges Throughput %System Improvement (tput / %sys)
-------- ------------ ----------- -------------------------
0 12.45 MB/sec 0.669365609
1 12.50 MB/sec 0.641519199 0.40% / 2.71%
2 12.52 MB/sec 0.639849750 0.56% / 2.96%
Signed-off-by: Alan D. Brunelle <alan.brunelle@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | Documentation/ABI/testing/sysfs-block | 14 | ||||
-rw-r--r-- | Documentation/block/queue-sysfs.txt | 10 | ||||
-rw-r--r-- | block/blk-sysfs.c | 11 | ||||
-rw-r--r-- | block/elevator.c | 11 | ||||
-rw-r--r-- | include/linux/blkdev.h | 3 |
5 files changed, 39 insertions, 10 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index d2f90334bb93..4873c759d535 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block | |||
@@ -128,3 +128,17 @@ Description: | |||
128 | preferred request size for workloads where sustained | 128 | preferred request size for workloads where sustained |
129 | throughput is desired. If no optimal I/O size is | 129 | throughput is desired. If no optimal I/O size is |
130 | reported this file contains 0. | 130 | reported this file contains 0. |
131 | |||
132 | What: /sys/block/<disk>/queue/nomerges | ||
133 | Date: January 2010 | ||
134 | Contact: | ||
135 | Description: | ||
136 | Standard I/O elevator operations include attempts to | ||
137 | merge contiguous I/Os. For known random I/O loads these | ||
138 | attempts will always fail and result in extra cycles | ||
139 | being spent in the kernel. This allows one to turn off | ||
140 | this behavior on one of two ways: When set to 1, complex | ||
141 | merge checks are disabled, but the simple one-shot merges | ||
142 | with the previous I/O request are enabled. When set to 2, | ||
143 | all merge tries are disabled. The default value is 0 - | ||
144 | which enables all types of merge tries. | ||
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index e164403f60e1..f65274081c8d 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt | |||
@@ -25,11 +25,11 @@ size allowed by the hardware. | |||
25 | 25 | ||
26 | nomerges (RW) | 26 | nomerges (RW) |
27 | ------------- | 27 | ------------- |
28 | This enables the user to disable the lookup logic involved with IO merging | 28 | This enables the user to disable the lookup logic involved with IO |
29 | requests in the block layer. Merging may still occur through a direct | 29 | merging requests in the block layer. By default (0) all merges are |
30 | 1-hit cache, since that comes for (almost) free. The IO scheduler will not | 30 | enabled. When set to 1 only simple one-hit merges will be tried. When |
31 | waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults | 31 | set to 2 no merge algorithms will be tried (including one-hit or more |
32 | to 0, enabling all merges. | 32 | complex tree/hash lookups). |
33 | 33 | ||
34 | nr_requests (RW) | 34 | nr_requests (RW) |
35 | ---------------- | 35 | ---------------- |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8606c9543fdd..e85442415db3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, | |||
189 | 189 | ||
190 | static ssize_t queue_nomerges_show(struct request_queue *q, char *page) | 190 | static ssize_t queue_nomerges_show(struct request_queue *q, char *page) |
191 | { | 191 | { |
192 | return queue_var_show(blk_queue_nomerges(q), page); | 192 | return queue_var_show((blk_queue_nomerges(q) << 1) | |
193 | blk_queue_noxmerges(q), page); | ||
193 | } | 194 | } |
194 | 195 | ||
195 | static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, | 196 | static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, |
@@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, | |||
199 | ssize_t ret = queue_var_store(&nm, page, count); | 200 | ssize_t ret = queue_var_store(&nm, page, count); |
200 | 201 | ||
201 | spin_lock_irq(q->queue_lock); | 202 | spin_lock_irq(q->queue_lock); |
202 | if (nm) | 203 | queue_flag_clear(QUEUE_FLAG_NOMERGES, q); |
204 | queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); | ||
205 | if (nm == 2) | ||
203 | queue_flag_set(QUEUE_FLAG_NOMERGES, q); | 206 | queue_flag_set(QUEUE_FLAG_NOMERGES, q); |
204 | else | 207 | else if (nm) |
205 | queue_flag_clear(QUEUE_FLAG_NOMERGES, q); | 208 | queue_flag_set(QUEUE_FLAG_NOXMERGES, q); |
206 | spin_unlock_irq(q->queue_lock); | 209 | spin_unlock_irq(q->queue_lock); |
207 | 210 | ||
208 | return ret; | 211 | return ret; |
diff --git a/block/elevator.c b/block/elevator.c index 9ad5ccc4c5ee..ee3a883840f2 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -474,6 +474,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
474 | int ret; | 474 | int ret; |
475 | 475 | ||
476 | /* | 476 | /* |
477 | * Levels of merges: | ||
478 | * nomerges: No merges at all attempted | ||
479 | * noxmerges: Only simple one-hit cache try | ||
480 | * merges: All merge tries attempted | ||
481 | */ | ||
482 | if (blk_queue_nomerges(q)) | ||
483 | return ELEVATOR_NO_MERGE; | ||
484 | |||
485 | /* | ||
477 | * First try one-hit cache. | 486 | * First try one-hit cache. |
478 | */ | 487 | */ |
479 | if (q->last_merge) { | 488 | if (q->last_merge) { |
@@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
484 | } | 493 | } |
485 | } | 494 | } |
486 | 495 | ||
487 | if (blk_queue_nomerges(q)) | 496 | if (blk_queue_noxmerges(q)) |
488 | return ELEVATOR_NO_MERGE; | 497 | return ELEVATOR_NO_MERGE; |
489 | 498 | ||
490 | /* | 499 | /* |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ffb13ad35716..f71f5c58620c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -463,6 +463,7 @@ struct request_queue | |||
463 | #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ | 463 | #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ |
464 | #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ | 464 | #define QUEUE_FLAG_CQ 16 /* hardware does queuing */ |
465 | #define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ | 465 | #define QUEUE_FLAG_DISCARD 17 /* supports DISCARD */ |
466 | #define QUEUE_FLAG_NOXMERGES 18 /* No extended merges */ | ||
466 | 467 | ||
467 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 468 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
468 | (1 << QUEUE_FLAG_CLUSTER) | \ | 469 | (1 << QUEUE_FLAG_CLUSTER) | \ |
@@ -589,6 +590,8 @@ enum { | |||
589 | #define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) | 590 | #define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) |
590 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 591 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
591 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) | 592 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) |
593 | #define blk_queue_noxmerges(q) \ | ||
594 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) | ||
592 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) | 595 | #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) |
593 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) | 596 | #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) |
594 | #define blk_queue_flushing(q) ((q)->ordseq) | 597 | #define blk_queue_flushing(q) ((q)->ordseq) |