From fb2dce862d9f9a68e6b9374579056ec9eca02a63 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Aug 2008 18:01:53 +0100 Subject: Add 'discard' request handling Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi and Jens Axboe Signed-off-by: Jens Axboe --- block/blk-settings.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index dfc77012843f..539d873c820d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) } EXPORT_SYMBOL(blk_queue_prep_rq); +/** + * blk_queue_set_discard - set a discard_sectors function for queue + * @q: queue + * @dfn: prepare_discard function + * + * It's possible for a queue to register a discard callback which is used + * to transform a discard request into the appropriate type for the + * hardware. If none is registered, then discard requests are failed + * with %EOPNOTSUPP. + * + */ +void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) +{ + q->prepare_discard_fn = dfn; +} +EXPORT_SYMBOL(blk_queue_set_discard); + /** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue -- cgit v1.2.2 From 710027a48ede75428cc68eaa8ae2269b1e356e2c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 19 Aug 2008 20:13:11 +0200 Subject: Add some block/ source files to the kernel-api docbook. Fix kernel-doc notation in them as needed. Fix changed function parameter names. Fix typos/spellos. In comments, change REQ_SPECIAL to REQ_TYPE_SPECIAL and REQ_BLOCK_PC to REQ_TYPE_BLOCK_PC. Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/blk-settings.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index 539d873c820d..d70692badcdb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -144,7 +144,7 @@ EXPORT_SYMBOL(blk_queue_make_request); * Different hardware can have different requirements as to what pages * it can do I/O directly to. A low level driver can call * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @page. + * buffers for doing I/O to pages residing above @dma_addr. **/ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) { @@ -229,7 +229,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments); * Description: * Enables a low level driver to set an upper limit on the number of * hw data segments in a request. This would be the largest number of - * address/length pairs the host adapter can actually give as once + * address/length pairs the host adapter can actually give at once * to the device. **/ void blk_queue_max_hw_segments(struct request_queue *q, @@ -410,7 +410,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary); * @mask: alignment mask * * description: - * set required memory and length aligment for direct dma transactions. + * set required memory and length alignment for direct dma transactions. * this is used when buiding direct io requests for the queue. * **/ @@ -426,7 +426,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); * @mask: alignment mask * * description: - * update required memory and length aligment for direct dma transactions. + * update required memory and length alignment for direct dma transactions. * If the requested alignment is larger than the current alignment, then * the current queue alignment is updated to the new value, otherwise it * is left alone. The design of this is to allow multiple objects -- cgit v1.2.2 From c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 13 Sep 2008 20:26:01 +0200 Subject: block: add support for IO CPU affinity This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index d70692badcdb..a60e959a12c4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); -static int __init blk_settings_init(void) +int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; blk_max_pfn = max_pfn - 1; -- cgit v1.2.2 From aeb3d3a81e81c6323a17fe914e91eb228b3f1aa1 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 28 Aug 2008 09:27:42 +0200 Subject: block: kmalloc args reversed, small function definition fixes Noticed by sparse: block/blk-softirq.c:156:12: warning: symbol 'blk_softirq_init' was not declared. Should it be static? block/genhd.c:583:28: warning: function 'bdget_disk' with external linkage has definition block/genhd.c:659:17: warning: incorrect type in argument 1 (different base types) block/genhd.c:659:17: expected unsigned int [unsigned] [usertype] size block/genhd.c:659:17: got restricted gfp_t block/genhd.c:659:29: warning: incorrect type in argument 2 (different base types) block/genhd.c:659:29: expected restricted gfp_t [usertype] flags block/genhd.c:659:29: got unsigned int block: kmalloc args reversed Signed-off-by: Harvey Harrison Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index a60e959a12c4..d70692badcdb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); -int __init blk_settings_init(void) +static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; blk_max_pfn = max_pfn - 1; -- cgit v1.2.2 From 242f9dcb8ba6f68fcd217a119a7648a4f69290e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 14 Sep 2008 05:55:09 -0700 Subject: block: unify request timeout handling Right now SCSI and others do their own command timeout handling. Move those bits to the block layer. Instead of having a timer per command, we try to be a bit more clever and simply have one per-queue. This avoids the overhead of having to tear down and setup a timer for each command, so it will result in a lot less timer fiddling. Signed-off-by: Mike Anderson Signed-off-by: Jens Axboe --- block/blk-settings.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index d70692badcdb..1d0330d0b40a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -77,6 +77,18 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) } EXPORT_SYMBOL(blk_queue_softirq_done); +void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) +{ + q->rq_timeout = timeout; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); + +void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) +{ + q->rq_timed_out_fn = fn; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected -- cgit v1.2.2 From ef9e3facdf1fe1228721a7c295a76d1b7a0e57ec Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Wed, 1 Oct 2008 16:12:15 +0200 Subject: block: add lld busy state exporting interface This patch adds an new interface, blk_lld_busy(), to check lld's busy state from the block layer. blk_lld_busy() calls down into low-level drivers for the checking if the drivers set q->lld_busy_fn() using blk_queue_lld_busy(). This resolves a performance problem on request stacking devices below. Some drivers like scsi mid layer stop dispatching request when they detect busy state on its low-level device like host/target/device. It allows other requests to stay in the I/O scheduler's queue for a chance of merging. Request stacking drivers like request-based dm should follow the same logic. However, there is no generic interface for the stacked device to check if the underlying device(s) are busy. If the request stacking driver dispatches and submits requests to the busy underlying device, the requests will stay in the underlying device's queue without a chance of merging. This causes performance problem on burst I/O load. With this patch, busy state of the underlying device is exported via q->lld_busy_fn(). So the request stacking driver can check it and stop dispatching requests if busy. The underlying device driver must return the busy state appropriately: 1: when the device driver can't process requests immediately. 0: when the device driver can process requests immediately, including abnormal situations where the device driver needs to kill all requests. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-settings.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index 1d0330d0b40a..b21dcdb64151 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -89,6 +89,12 @@ void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) } EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); +void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) +{ + q->lld_busy_fn = fn; +} +EXPORT_SYMBOL_GPL(blk_queue_lld_busy); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected -- cgit v1.2.2 From 713ada9ba94f2ad874cffd074b83e3dc681ca82f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 13:44:57 +0200 Subject: block: move q->unplug_work initialization modprobe loop; rmmod loop effectively creates a blk_queue and destroys it which results in q->unplug_work being canceled without it ever being initialized. Therefore, move the initialization of q->unplug_work from blk_queue_make_request() to blk_alloc_queue*(). Reported-by: Alexey Dobriyan Signed-off-by: Peter Zijlstra Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'block/blk-settings.c') diff --git a/block/blk-settings.c b/block/blk-settings.c index b21dcdb64151..41392fbe19ff 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -141,8 +141,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) if (q->unplug_delay == 0) q->unplug_delay = 1; - INIT_WORK(&q->unplug_work, blk_unplug_work); - q->unplug_timer.function = blk_unplug_timeout; q->unplug_timer.data = (unsigned long)q; -- cgit v1.2.2