aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2012-01-12 20:20:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:12 -0500
commit65dd2aa90aa17a26703c28652408192856aa0396 (patch)
tree4ea4635f786fc295d16a459925da67701253df97 /fs/direct-io.c
parent87192a2a49c475cf322cb143e0fa63b0102d8567 (diff)
dio: optimize cache misses in the submission path
Some investigation of a transaction processing workload showed that a major consumer of cycles in __blockdev_direct_IO is the cache miss while accessing the block size. This is because it has to walk the chain from block_dev to gendisk to queue. The block size is needed early on to check alignment and sizes. It's only done if the check for the inode block size fails. But the costly block device state is unconditionally fetched. - Reorganize the code to only fetch block dev state when actually needed. Then do a prefetch on the block dev early on in the direct IO path. This is worth it, because there is substantial code run before we actually touch the block dev now. - I also added some unlikelies to make it clear the compiler that block device fetch code is not normally executed. This gave a small, but measurable improvement on a large database benchmark (about 0.3%) [akpm@linux-foundation.org: coding-style fixes] [sfr@canb.auug.org.au: using prefetch requires including prefetch.h] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c46
1 files changed, 37 insertions, 9 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 389863f59cda..4a588dbd11bf 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -36,6 +36,7 @@
36#include <linux/rwsem.h> 36#include <linux/rwsem.h>
37#include <linux/uio.h> 37#include <linux/uio.h>
38#include <linux/atomic.h> 38#include <linux/atomic.h>
39#include <linux/prefetch.h>
39 40
40/* 41/*
41 * How many user pages to map in one call to get_user_pages(). This determines 42 * How many user pages to map in one call to get_user_pages(). This determines
@@ -1087,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio)
1087 * individual fields and will generate much worse code. This is important 1088 * individual fields and will generate much worse code. This is important
1088 * for the whole file. 1089 * for the whole file.
1089 */ 1090 */
1090ssize_t 1091static inline ssize_t
1091__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1092do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1092 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1093 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1093 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1094 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1094 dio_submit_t submit_io, int flags) 1095 dio_submit_t submit_io, int flags)
@@ -1097,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1097 size_t size; 1098 size_t size;
1098 unsigned long addr; 1099 unsigned long addr;
1099 unsigned blkbits = inode->i_blkbits; 1100 unsigned blkbits = inode->i_blkbits;
1100 unsigned bdev_blkbits = 0;
1101 unsigned blocksize_mask = (1 << blkbits) - 1; 1101 unsigned blocksize_mask = (1 << blkbits) - 1;
1102 ssize_t retval = -EINVAL; 1102 ssize_t retval = -EINVAL;
1103 loff_t end = offset; 1103 loff_t end = offset;
@@ -1110,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1110 if (rw & WRITE) 1110 if (rw & WRITE)
1111 rw = WRITE_ODIRECT; 1111 rw = WRITE_ODIRECT;
1112 1112
1113 if (bdev) 1113 /*
1114 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1114 * Avoid references to bdev if not absolutely needed to give
1115 * the early prefetch in the caller enough time.
1116 */
1115 1117
1116 if (offset & blocksize_mask) { 1118 if (offset & blocksize_mask) {
1117 if (bdev) 1119 if (bdev)
1118 blkbits = bdev_blkbits; 1120 blkbits = blksize_bits(bdev_logical_block_size(bdev));
1119 blocksize_mask = (1 << blkbits) - 1; 1121 blocksize_mask = (1 << blkbits) - 1;
1120 if (offset & blocksize_mask) 1122 if (offset & blocksize_mask)
1121 goto out; 1123 goto out;
@@ -1126,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1126 addr = (unsigned long)iov[seg].iov_base; 1128 addr = (unsigned long)iov[seg].iov_base;
1127 size = iov[seg].iov_len; 1129 size = iov[seg].iov_len;
1128 end += size; 1130 end += size;
1129 if ((addr & blocksize_mask) || (size & blocksize_mask)) { 1131 if (unlikely((addr & blocksize_mask) ||
1132 (size & blocksize_mask))) {
1130 if (bdev) 1133 if (bdev)
1131 blkbits = bdev_blkbits; 1134 blkbits = blksize_bits(
1135 bdev_logical_block_size(bdev));
1132 blocksize_mask = (1 << blkbits) - 1; 1136 blocksize_mask = (1 << blkbits) - 1;
1133 if ((addr & blocksize_mask) || (size & blocksize_mask)) 1137 if ((addr & blocksize_mask) || (size & blocksize_mask))
1134 goto out; 1138 goto out;
1135 } 1139 }
1136 } 1140 }
@@ -1313,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1313out: 1317out:
1314 return retval; 1318 return retval;
1315} 1319}
1320
1321ssize_t
1322__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1323 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1324 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1325 dio_submit_t submit_io, int flags)
1326{
1327 /*
1328 * The block device state is needed in the end to finally
1329 * submit everything. Since it's likely to be cache cold
1330 * prefetch it here as first thing to hide some of the
1331 * latency.
1332 *
1333 * Attempt to prefetch the pieces we likely need later.
1334 */
1335 prefetch(&bdev->bd_disk->part_tbl);
1336 prefetch(bdev->bd_queue);
1337 prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
1338
1339 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
1340 nr_segs, get_block, end_io,
1341 submit_io, flags);
1342}
1343
1316EXPORT_SYMBOL(__blockdev_direct_IO); 1344EXPORT_SYMBOL(__blockdev_direct_IO);
1317 1345
1318static __init int dio_init(void) 1346static __init int dio_init(void)