From ae1b1539622fb46e51b4d13b3f9e5f4c713f86ae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 25 Jan 2011 12:43:54 +0100 Subject: block: reimplement FLUSH/FUA to support merge The current FLUSH/FUA support has evolved from the implementation which had to perform queue draining. As such, sequencing is done queue-wide one flush request after another. However, with the draining requirement gone, there's no reason to keep the queue-wide sequential approach. This patch reimplements FLUSH/FUA support such that each FLUSH/FUA request is sequenced individually. The actual FLUSH execution is double buffered and whenever a request wants to execute one for either PRE or POSTFLUSH, it queues on the pending queue. Once certain conditions are met, a flush request is issued and on its completion all pending requests proceed to the next sequence. This allows arbitrary merging of different type of flushes. How they are merged can be primarily controlled and tuned by adjusting the above said 'conditions' used to determine when to issue the next flush. This is inspired by Darrick's patches to merge multiple zero-data flushes which helps workloads with highly concurrent fsync requests. * As flush requests are never put on the IO scheduler, request fields used for flush share space with rq->rb_node. rq->completion_data is moved out of the union. This increases the request size by one pointer. As rq->elevator_private* are used only by the iosched too, it is possible to reduce the request size further. However, to do that, we need to modify request allocation path such that iosched data is not allocated for flush requests. * FLUSH/FUA processing happens on insertion now instead of dispatch. - Comments updated as per Vivek and Mike. Signed-off-by: Tejun Heo Cc: "Darrick J. Wong" Cc: Shaohua Li Cc: Christoph Hellwig Cc: Vivek Goyal Cc: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 36ab42c9bb99..6d7e9afd08c3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,13 +99,18 @@ struct request { /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the - * completion_data share space with the rb_node. + * flush fields share space with the rb_node. */ union { struct rb_node rb_node; /* sort/lookup */ - void *completion_data; + struct { + unsigned int seq; + struct list_head list; + } flush; }; + void *completion_data; + /* * Three pointers are available for the IO schedulers, if they need * more they have to dynamically allocate it. @@ -362,11 +367,12 @@ struct request_queue * for flush operations */ unsigned int flush_flags; - unsigned int flush_seq; - int flush_err; + unsigned int flush_pending_idx:1; + unsigned int flush_running_idx:1; + unsigned long flush_pending_since; + struct list_head flush_queue[2]; + struct list_head flush_data_in_flight; struct request flush_rq; - struct request *orig_flush_rq; - struct list_head pending_flushes; struct mutex sysfs_lock; -- cgit v1.2.2 From c186794dbb466b45cf40f942f2d09d6d5b4b0e42 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 11 Feb 2011 11:08:00 +0100 Subject: block: share request flush fields with elevator_private Flush requests are never put on the IO scheduler. Convert request structure's elevator_private* into an array and have the flush fields share a union with it. Reclaim the space lost in 'struct request' by moving 'completion_data' back in the union with 'rb_node'. Signed-off-by: Mike Snitzer Acked-by: Vivek Goyal Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6d7e9afd08c3..12bb426949e9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -99,25 +99,26 @@ struct request { /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the - * flush fields share space with the rb_node. + * completion_data share space with the rb_node. */ union { struct rb_node rb_node; /* sort/lookup */ - struct { - unsigned int seq; - struct list_head list; - } flush; + void *completion_data; }; - void *completion_data; - /* * Three pointers are available for the IO schedulers, if they need - * more they have to dynamically allocate it. + * more they have to dynamically allocate it. Flush requests are + * never put on the IO scheduler. So let the flush fields share + * space with the three elevator_private pointers. */ - void *elevator_private; - void *elevator_private2; - void *elevator_private3; + union { + void *elevator_private[3]; + struct { + unsigned int seq; + struct list_head list; + } flush; + }; struct gendisk *rq_disk; unsigned long start_time; -- cgit v1.2.2 From 450adcbe518ab3a3953d8475309525d22de77cba Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 1 Mar 2011 13:40:54 -0500 Subject: blk-throttle: Do not use kblockd workqueue for throtl work o Dominik Klein reported a system hang issue while doing some blkio throttling testing. https://lkml.org/lkml/2011/2/24/173 o Some tracing revealed that CFQ was not dispatching any more jobs as queue unplug was not happening. And queue unplug was not happening because unplug work was not being called as there was one throttling work on same cpu which as not finished yet. And throttling work had not finished as it was tyring to dispatch a bio to CFQ but all the request descriptors were consume to it was put to sleep. o So basically it is a cyclic dependecny between CFQ unplug work and throtl dispatch work. Tejun suggested that use separate workqueue for such cases. o This patch uses a separate workqueue for throttle related work and does not rely on kblockd workqueue anymore. Cc: stable@kernel.org Reported-by: Dominik Klein Signed-off-by: Vivek Goyal Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d18ff34670a..dd8cd0f47e3a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1088,7 +1088,6 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); -int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1136,7 +1135,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); -extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); extern void throtl_shutdown_timer_wq(struct request_queue *q); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) @@ -1146,7 +1144,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline int blk_throtl_exit(struct request_queue *q) { return 0; } -static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} #endif /* CONFIG_BLK_DEV_THROTTLING */ -- cgit v1.2.2 From 1654e7411a1ad4999fe7890ef51d2a2bbb1fcf76 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Mar 2011 08:48:05 -0500 Subject: block: add @force_kblockd to __blk_run_queue() __blk_run_queue() automatically either calls q->request_fn() directly or schedules kblockd depending on whether the function is recursed. blk-flush implementation needs to be able to explicitly choose kblockd. Add @force_kblockd. All the current users are converted to specify %false for the parameter and this patch doesn't introduce any behavior change. stable: This is prerequisite for fixing ide oops caused by the new blk-flush implementation. Signed-off-by: Tejun Heo Cc: Jan Beulich Cc: James Bottomley Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dd8cd0f47e3a..d5063e1b5555 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -699,7 +699,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *); +extern void __blk_run_queue(struct request_queue *q, bool force_kblockd); extern void blk_run_queue(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, -- cgit v1.2.2 From da527770007fce8e4541947d47918248286da875 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 2 Mar 2011 19:05:33 -0500 Subject: block: Move blk_throtl_exit() call to blk_cleanup_queue() Move blk_throtl_exit() in blk_cleanup_queue() as blk_throtl_exit() is written in such a way that it needs queue lock. In blk_release_queue() there is no gurantee that ->queue_lock is still around. Initially blk_throtl_exit() was in blk_cleanup_queue() but Ingo reported one problem. https://lkml.org/lkml/2010/10/23/86 And a quick fix moved blk_throtl_exit() to blk_release_queue(). commit 7ad58c028652753814054f4e3ac58f925e7343f4 Author: Jens Axboe Date: Sat Oct 23 20:40:26 2010 +0200 block: fix use-after-free bug in blk throttle code This patch reverts above change and does not try to shutdown the throtl work in blk_sync_queue(). By avoiding call to throtl_shutdown_timer_wq() from blk_sync_queue(), we should also avoid the problem reported by Ingo. blk_sync_queue() seems to be used only by md driver and it seems to be using it to make sure q->unplug_fn is not called as md registers its own unplug functions and it is about to free up the data structures used by unplug_fn(). Block throttle does not call back into unplug_fn() or into md. So there is no need to cancel blk throttle work. In fact I think cancelling block throttle work is bad because it might happen that some bios are throttled and scheduled to be dispatched later with the help of pending work and if work is cancelled, these bios might never be dispatched. Block layer also uses blk_sync_queue() during blk_cleanup_queue() and blk_release_queue() time. That should be safe as we are also calling blk_throtl_exit() which should make sure all the throttling related data structures are cleaned up. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3ee74fc5903..23fb92506c31 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1144,7 +1144,6 @@ extern int blk_throtl_init(struct request_queue *q); extern void blk_throtl_exit(struct request_queue *q); extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); -extern void throtl_shutdown_timer_wq(struct request_queue *q); #else /* CONFIG_BLK_DEV_THROTTLING */ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) { @@ -1154,7 +1153,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) static inline int blk_throtl_init(struct request_queue *q) { return 0; } static inline int blk_throtl_exit(struct request_queue *q) { return 0; } static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} -static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} #endif /* CONFIG_BLK_DEV_THROTTLING */ #define MODULE_ALIAS_BLOCKDEV(major,minor) \ -- cgit v1.2.2 From 3cca6dc1c81e2407928dc4c6105252146fd3924f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Mar 2011 11:08:00 -0500 Subject: block: add API for delaying work/request_fn a little bit Currently we use plugging for that, but as plugging is going away, we need an alternative mechanism. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3ee74fc5903..f55b2a8b6610 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -300,6 +300,11 @@ struct request_queue unsigned long unplug_delay; /* After this many jiffies */ struct work_struct unplug_work; + /* + * Delayed queue handling + */ + struct delayed_work delay_work; + struct backing_dev_info backing_dev_info; /* @@ -677,6 +682,7 @@ extern int blk_insert_cloned_request(struct request_queue *q, extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); +extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); -- cgit v1.2.2 From 73c101011926c5832e6e141682180c4debe2cf45 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Mar 2011 13:19:51 +0100 Subject: block: initial patch for on-stack per-task plugging This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f55b2a8b6610..5873037eeb91 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +struct blk_plug { + unsigned long magic; + struct list_head list; + unsigned int should_sort; +}; + +extern void blk_start_plug(struct blk_plug *); +extern void blk_finish_plug(struct blk_plug *); +extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); + +static inline void blk_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (unlikely(plug)) + __blk_flush_plug(tsk, plug); +} + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + return plug && !list_empty(&plug->list); +} + /* * tag stuff */ @@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void) return 0; } +static inline void blk_start_plug(struct list_head *list) +{ +} + +static inline void blk_finish_plug(struct list_head *list) +{ +} + +static inline void blk_flush_plug(struct task_struct *tsk) +{ +} + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + return false; +} + #endif /* CONFIG_BLOCK */ #endif -- cgit v1.2.2 From 7eaceaccab5f40bbfda044629a6298616aeaed50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2011 08:52:07 +0100 Subject: block: remove per-queue plugging Code has been converted over to the new explicit on-stack plugging, and delay users have been converted to use the new API for that. So lets kill off the old plugging along with aops->sync_page(). Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5873037eeb91..64ab2a1bb167 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); -typedef void (unplug_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -279,7 +278,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; - unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; @@ -292,14 +290,6 @@ struct request_queue sector_t end_sector; struct request *boundary_rq; - /* - * Auto-unplugging state - */ - struct timer_list unplug_timer; - int unplug_thresh; /* After this many requests */ - unsigned long unplug_delay; /* After this many jiffies */ - struct work_struct unplug_work; - /* * Delayed queue handling */ @@ -399,14 +389,13 @@ struct request_queue #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ -#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ -#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ -#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_ELVSWITCH 7 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 8 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 9 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 10 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 11 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 12 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 13 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ @@ -484,7 +473,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) @@ -679,9 +667,6 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern void blk_plug_device(struct request_queue *); -extern void blk_plug_device_unlocked(struct request_queue *); -extern int blk_remove_plug(struct request_queue *); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, @@ -726,7 +711,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -extern void blk_unplug(struct request_queue *q); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -863,7 +847,6 @@ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bd extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -extern void generic_unplug_device(struct request_queue *); extern long nr_blockdev_pages(void); int blk_get_queue(struct request_queue *); -- cgit v1.2.2 From 1f940bdfc0d03265d178d9dfd840d854819f797d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 11 Mar 2011 20:17:08 +0100 Subject: block: fixup plugging stubs for !CONFIG_BLOCK They used an older prototype, fix it up. Reported-by: Randy Dunlap Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 91fa428fa2c1..16a902f099ac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1297,15 +1297,18 @@ static inline long nr_blockdev_pages(void) return 0; } -static inline void blk_start_plug(struct list_head *list) +struct blk_plug { +}; + +static inline void blk_start_plug(struct blk_plug *plug) { } -static inline void blk_finish_plug(struct list_head *list) +static inline void blk_finish_plug(struct blk_plug *plug) { } -static inline void blk_flush_plug(struct task_struct *tsk) +static inline void blk_flush_plug(struct task_struct *task) { } -- cgit v1.2.2 From a63a5cf84dac7a23a57c800eea5734701e7d3c04 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 1 Apr 2011 21:02:31 +0200 Subject: dm: improve block integrity support The current block integrity (DIF/DIX) support in DM is verifying that all devices' integrity profiles match during DM device resume (which is past the point of no return). To some degree that is unavoidable (stacked DM devices force this late checking). But for most DM devices (which aren't stacking on other DM devices) the ideal time to verify all integrity profiles match is during table load. Introduce the notion of an "initialized" integrity profile: a profile that was blk_integrity_register()'d with a non-NULL 'blk_integrity' template. Add blk_integrity_is_initialized() to allow checking if a profile was initialized. Update DM integrity support to: - check all devices with _initialized_ integrity profiles match during table load; uninitialized profiles (e.g. for underlying DM device(s) of a stacked DM device) are ignored. - disallow a table load that would result in an integrity profile that conflicts with a DM device's existing (in-use) integrity profile - avoid clearing an existing integrity profile - validate all integrity profiles match during resume; but if they don't all we can do is report the mismatch (during resume we're past the point of no return) Signed-off-by: Mike Snitzer Cc: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 16a902f099ac..32176cc8e715 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1206,6 +1206,7 @@ struct blk_integrity { struct kobject kobj; }; +extern bool blk_integrity_is_initialized(struct gendisk *); extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); @@ -1262,6 +1263,7 @@ queue_max_integrity_segments(struct request_queue *q) #define queue_max_integrity_segments(a) (0) #define blk_integrity_merge_rq(a, b, c) (0) #define blk_integrity_merge_bio(a, b, c) (0) +#define blk_integrity_is_initialized(a) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.2 From f75664570d8b75469cc468f23c2b27220984983b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Apr 2011 10:17:31 +0200 Subject: block: add callback function for unplug notification MD would like to know when a queue is unplugged, so it can flush it's bitmap writes. Add such a callback. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32176cc8e715..c07ffafac5d4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,6 +196,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); +typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -283,6 +284,7 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -841,6 +843,7 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.2 From 88b996cd0652280cc9b9fc70008fda15f14175e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Apr 2011 15:20:10 +0200 Subject: block: cleanup the block plug helper functions It's a bit of a mess currently. task->plug is being cleared and reset in __blk_finish_plug(), and blk_finish_plug() is testing for a NULL plug which cannot happen even from schedule() anymore since it uses blk_needs_flush_plug() to determine whether to call into this function at all. So get rid of some of the cruft. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c07ffafac5d4..ffe48ff318f9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - if (unlikely(plug)) - __blk_flush_plug(tsk, plug); + if (plug) + blk_flush_plug_list(plug); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) -- cgit v1.2.2 From f6603783f9f099bf7a83b3f6c689bbbf74f0e96e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Apr 2011 15:49:07 +0200 Subject: block: only force kblockd unplugging from the schedule() path For the explicit unplugging, we'd prefer to kick things off immediately and not pay the penalty of the latency to switch to kblockd. So let blk_finish_plug() do the run inline, while the implicit-on-schedule-out unplug will punt to kblockd. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ffe48ff318f9..1c76506fcf11 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,14 +865,14 @@ struct blk_plug { extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); -extern void blk_flush_plug_list(struct blk_plug *); +extern void blk_flush_plug_list(struct blk_plug *, bool); static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; if (plug) - blk_flush_plug_list(plug); + blk_flush_plug_list(plug, true); } static inline bool blk_needs_flush_plug(struct task_struct *tsk) -- cgit v1.2.2 From a237c1c5bc5dc5c76a21be922dca4826f3eca8ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Apr 2011 13:27:55 +0200 Subject: block: let io_schedule() flush the plug inline Linus correctly observes that the most important dispatch cases are now done from kblockd, this isn't ideal for latency reasons. The original reason for switching dispatches out-of-line was to avoid too deep a stack, so by _only_ letting the "accidental" flush directly in schedule() be guarded by offload to kblockd, we should be able to get the best of both worlds. So add a blk_schedule_flush_plug() that offloads to kblockd, and only use that from the schedule() path. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1c76506fcf11..ec0357d8c4a5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,6 +871,14 @@ static inline void blk_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; + if (plug) + blk_flush_plug_list(plug, false); +} + +static inline void blk_schedule_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + if (plug) blk_flush_plug_list(plug, true); } @@ -1317,6 +1325,11 @@ static inline void blk_flush_plug(struct task_struct *task) { } +static inline void blk_schedule_flush_plug(struct task_struct *task) +{ +} + + static inline bool blk_needs_flush_plug(struct task_struct *tsk) { return false; -- cgit v1.2.2 From 048c9374a749a27f16493cea033fa4a8ff492356 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Apr 2011 09:52:22 +0200 Subject: block: Enhance new plugging support to support general callbacks md/raid requires an unplug callback, but as it does not uses requests the current code cannot provide one. So allow arbitrary callbacks to be attached to the blk_plug. Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec0357d8c4a5..f3f7879391a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,8 +860,13 @@ extern void blk_put_queue(struct request_queue *); struct blk_plug { unsigned long magic; struct list_head list; + struct list_head cb_list; unsigned int should_sort; }; +struct blk_plug_cb { + struct list_head list; + void (*callback)(struct blk_plug_cb *); +}; extern void blk_start_plug(struct blk_plug *); extern void blk_finish_plug(struct blk_plug *); @@ -887,7 +892,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) { struct blk_plug *plug = tsk->plug; - return plug && !list_empty(&plug->list); + return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); } /* -- cgit v1.2.2 From b4cb290e0a7d19235bd075c2ad4d60dbab0bac15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Apr 2011 09:54:05 +0200 Subject: Revert "block: add callback function for unplug notification" MD can't use this since it really requires us to be able to keep more than a single piece of state for the unplug. Commit 048c9374 added the required support for MD, so get rid of this now unused code. This reverts commit f75664570d8b75469cc468f23c2b27220984983b. Conflicts: block/blk-core.c Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3f7879391a7..3448d89297e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); -typedef void (unplugged_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -284,7 +283,6 @@ struct request_queue rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; - unplugged_fn *unplugged_fn; /* * Dispatch queue sorting @@ -843,7 +841,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); -extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -- cgit v1.2.2 From 24ecfbe27f65563909b14492afda2f1c21f7c044 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Apr 2011 11:41:33 +0200 Subject: block: add blk_run_queue_async Instead of overloading __blk_run_queue to force an offload to kblockd add a new blk_run_queue_async helper to do it explicitly. I've kept the blk_queue_stopped check for now, but I suspect it's not needed as the check we do when the workqueue items runs should be enough. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3448d89297e8..cbbfd98ad4a3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -697,7 +697,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q, bool force_kblockd); +extern void __blk_run_queue(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, -- cgit v1.2.2