aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-04-06 08:48:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-06 11:04:54 -0400
commitaeb6fafb8fa53266d70ca7474fcda2bdaf96524a (patch)
treeebe8e0c616a9dbfdfe5184b64d0150ea02d3d1b2
parent644b2d99b7a8677a56909a7b1fde31677eba4471 (diff)
block: Add flag for telling the IO schedulers NOT to anticipate more IO
By default, CFQ will anticipate more IO from a given io context if the previously completed IO was sync. This used to be fine, since the only sync IO was reads and O_DIRECT writes. But with more "normal" sync writes being used now, we don't want to anticipate for those. Add a bio/request flag that informs the IO scheduler that this is a sync request that we should not idle for. Introduce WRITE_ODIRECT specifically for O_DIRECT writes, and make sure that the other sync writes set this flag. Signed-off-by: Jens Axboe <jens.axboe@oracle.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--block/blk-core.c2
-rw-r--r--block/cfq-iosched.c4
-rw-r--r--fs/direct-io.c2
-rw-r--r--include/linux/bio.h19
-rw-r--r--include/linux/blkdev.h3
-rw-r--r--include/linux/fs.h9
6 files changed, 25 insertions, 14 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index c4198f083e5b..25572802dac2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1128,6 +1128,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1128 req->cmd_flags |= REQ_UNPLUG; 1128 req->cmd_flags |= REQ_UNPLUG;
1129 if (bio_rw_meta(bio)) 1129 if (bio_rw_meta(bio))
1130 req->cmd_flags |= REQ_RW_META; 1130 req->cmd_flags |= REQ_RW_META;
1131 if (bio_noidle(bio))
1132 req->cmd_flags |= REQ_NOIDLE;
1131 1133
1132 req->errors = 0; 1134 req->errors = 0;
1133 req->hard_sector = req->sector = bio->bi_sector; 1135 req->hard_sector = req->sector = bio->bi_sector;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 664ebfd092ec..9e809345f71a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1992,8 +1992,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1992 } 1992 }
1993 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) 1993 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
1994 cfq_slice_expired(cfqd, 1); 1994 cfq_slice_expired(cfqd, 1);
1995 else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) 1995 else if (sync && !rq_noidle(rq) &&
1996 RB_EMPTY_ROOT(&cfqq->sort_list)) {
1996 cfq_arm_slice_timer(cfqd); 1997 cfq_arm_slice_timer(cfqd);
1998 }
1997 } 1999 }
1998 2000
1999 if (!cfqd->rq_in_driver) 2001 if (!cfqd->rq_in_driver)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b6d43908ff7a..da258e7249cc 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1126,7 +1126,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1126 int acquire_i_mutex = 0; 1126 int acquire_i_mutex = 0;
1127 1127
1128 if (rw & WRITE) 1128 if (rw & WRITE)
1129 rw = WRITE_SYNC; 1129 rw = WRITE_ODIRECT;
1130 1130
1131 if (bdev) 1131 if (bdev)
1132 bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev)); 1132 bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b05b1d4d17d2..b900d2c67d29 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -145,20 +145,21 @@ struct bio {
145 * bit 2 -- barrier 145 * bit 2 -- barrier
146 * Insert a serialization point in the IO queue, forcing previously 146 * Insert a serialization point in the IO queue, forcing previously
147 * submitted IO to be completed before this one is issued. 147 * submitted IO to be completed before this one is issued.
148 * bit 3 -- synchronous I/O hint: the block layer will unplug immediately 148 * bit 3 -- synchronous I/O hint.
149 * Note that this does NOT indicate that the IO itself is sync, just 149 * bit 4 -- Unplug the device immediately after submitting this bio.
150 * that the block layer will not postpone issue of this IO by plugging. 150 * bit 5 -- metadata request
151 * bit 4 -- metadata request
152 * Used for tracing to differentiate metadata and data IO. May also 151 * Used for tracing to differentiate metadata and data IO. May also
153 * get some preferential treatment in the IO scheduler 152 * get some preferential treatment in the IO scheduler
154 * bit 5 -- discard sectors 153 * bit 6 -- discard sectors
155 * Informs the lower level device that this range of sectors is no longer 154 * Informs the lower level device that this range of sectors is no longer
156 * used by the file system and may thus be freed by the device. Used 155 * used by the file system and may thus be freed by the device. Used
157 * for flash based storage. 156 * for flash based storage.
158 * bit 6 -- fail fast device errors 157 * bit 7 -- fail fast device errors
159 * bit 7 -- fail fast transport errors 158 * bit 8 -- fail fast transport errors
160 * bit 8 -- fail fast driver errors 159 * bit 9 -- fail fast driver errors
161 * Don't want driver retries for any fast fail whatever the reason. 160 * Don't want driver retries for any fast fail whatever the reason.
161 * bit 10 -- Tell the IO scheduler not to wait for more requests after this
162 one has been submitted, even if it is a SYNC request.
162 */ 163 */
163#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ 164#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */
164#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ 165#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */
@@ -170,6 +171,7 @@ struct bio {
170#define BIO_RW_FAILFAST_DEV 7 171#define BIO_RW_FAILFAST_DEV 7
171#define BIO_RW_FAILFAST_TRANSPORT 8 172#define BIO_RW_FAILFAST_TRANSPORT 8
172#define BIO_RW_FAILFAST_DRIVER 9 173#define BIO_RW_FAILFAST_DRIVER 9
174#define BIO_RW_NOIDLE 10
173 175
174#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) 176#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag)))
175 177
@@ -188,6 +190,7 @@ struct bio {
188#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) 190#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD)
189#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) 191#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META)
190#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) 192#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD)
193#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE)
191 194
192/* 195/*
193 * upper 16 bits of bi_rw define the io priority of this bio 196 * upper 16 bits of bi_rw define the io priority of this bio
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 67dae3bd881c..e03660964e02 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -118,6 +118,7 @@ enum rq_flag_bits {
118 __REQ_COPY_USER, /* contains copies of user pages */ 118 __REQ_COPY_USER, /* contains copies of user pages */
119 __REQ_INTEGRITY, /* integrity metadata has been remapped */ 119 __REQ_INTEGRITY, /* integrity metadata has been remapped */
120 __REQ_UNPLUG, /* unplug queue on submission */ 120 __REQ_UNPLUG, /* unplug queue on submission */
121 __REQ_NOIDLE, /* Don't anticipate more IO after this one */
121 __REQ_NR_BITS, /* stops here */ 122 __REQ_NR_BITS, /* stops here */
122}; 123};
123 124
@@ -145,6 +146,7 @@ enum rq_flag_bits {
145#define REQ_COPY_USER (1 << __REQ_COPY_USER) 146#define REQ_COPY_USER (1 << __REQ_COPY_USER)
146#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) 147#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
147#define REQ_UNPLUG (1 << __REQ_UNPLUG) 148#define REQ_UNPLUG (1 << __REQ_UNPLUG)
149#define REQ_NOIDLE (1 << __REQ_NOIDLE)
148 150
149#define BLK_MAX_CDB 16 151#define BLK_MAX_CDB 16
150 152
@@ -633,6 +635,7 @@ static inline bool rq_is_sync(struct request *rq)
633} 635}
634 636
635#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) 637#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META)
638#define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE)
636 639
637static inline int blk_queue_full(struct request_queue *q, int sync) 640static inline int blk_queue_full(struct request_queue *q, int sync)
638{ 641{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea0510978f76..cae5720f431c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -95,11 +95,12 @@ struct inodes_stat_t {
95#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ 95#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
96#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) 96#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
97#define READ_META (READ | (1 << BIO_RW_META)) 97#define READ_META (READ | (1 << BIO_RW_META))
98#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) 98#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
99#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) 99#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
100#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) 100#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
101#define SWRITE_SYNC_PLUG \ 101#define SWRITE_SYNC_PLUG \
102 (SWRITE | (1 << BIO_RW_SYNCIO)) 102 (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
103#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
103#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) 104#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
104#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) 105#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
105#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) 106#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))