aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/blkdev.h
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2011-01-25 06:43:54 -0500
committerJens Axboe <jaxboe@fusionio.com>2011-01-25 06:43:54 -0500
commitae1b1539622fb46e51b4d13b3f9e5f4c713f86ae (patch)
treec5cb540141003a3ec7ebf0b8c6e01653ab6aaef5 /include/linux/blkdev.h
parent143a87f4c9c629067afea5b6703d66ea88c82f8e (diff)
block: reimplement FLUSH/FUA to support merge
The current FLUSH/FUA support has evolved from the implementation which had to perform queue draining. As such, sequencing is done queue-wide one flush request after another. However, with the draining requirement gone, there's no reason to keep the queue-wide sequential approach. This patch reimplements FLUSH/FUA support such that each FLUSH/FUA request is sequenced individually. The actual FLUSH execution is double buffered and whenever a request wants to execute one for either PRE or POSTFLUSH, it queues on the pending queue. Once certain conditions are met, a flush request is issued and on its completion all pending requests proceed to the next sequence. This allows arbitrary merging of different type of flushes. How they are merged can be primarily controlled and tuned by adjusting the above said 'conditions' used to determine when to issue the next flush. This is inspired by Darrick's patches to merge multiple zero-data flushes which helps workloads with highly concurrent fsync requests. * As flush requests are never put on the IO scheduler, request fields used for flush share space with rq->rb_node. rq->completion_data is moved out of the union. This increases the request size by one pointer. As rq->elevator_private* are used only by the iosched too, it is possible to reduce the request size further. However, to do that, we need to modify request allocation path such that iosched data is not allocated for flush requests. * FLUSH/FUA processing happens on insertion now instead of dispatch. - Comments updated as per Vivek and Mike. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: "Darrick J. Wong" <djwong@us.ibm.com> Cc: Shaohua Li <shli@kernel.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'include/linux/blkdev.h')
-rw-r--r--include/linux/blkdev.h18
1 files changed, 12 insertions, 6 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 36ab42c9bb99..6d7e9afd08c3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -99,13 +99,18 @@ struct request {
99 /* 99 /*
100 * The rb_node is only used inside the io scheduler, requests 100 * The rb_node is only used inside the io scheduler, requests
101 * are pruned when moved to the dispatch queue. So let the 101 * are pruned when moved to the dispatch queue. So let the
102 * completion_data share space with the rb_node. 102 * flush fields share space with the rb_node.
103 */ 103 */
104 union { 104 union {
105 struct rb_node rb_node; /* sort/lookup */ 105 struct rb_node rb_node; /* sort/lookup */
106 void *completion_data; 106 struct {
107 unsigned int seq;
108 struct list_head list;
109 } flush;
107 }; 110 };
108 111
112 void *completion_data;
113
109 /* 114 /*
110 * Three pointers are available for the IO schedulers, if they need 115 * Three pointers are available for the IO schedulers, if they need
111 * more they have to dynamically allocate it. 116 * more they have to dynamically allocate it.
@@ -362,11 +367,12 @@ struct request_queue
362 * for flush operations 367 * for flush operations
363 */ 368 */
364 unsigned int flush_flags; 369 unsigned int flush_flags;
365 unsigned int flush_seq; 370 unsigned int flush_pending_idx:1;
366 int flush_err; 371 unsigned int flush_running_idx:1;
372 unsigned long flush_pending_since;
373 struct list_head flush_queue[2];
374 struct list_head flush_data_in_flight;
367 struct request flush_rq; 375 struct request flush_rq;
368 struct request *orig_flush_rq;
369 struct list_head pending_flushes;
370 376
371 struct mutex sysfs_lock; 377 struct mutex sysfs_lock;
372 378