aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2015-04-15 19:05:48 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-04-24 15:45:28 -0400
commitfe0f07d08ee35fb13d2cb048970072fe4f71ad14 (patch)
treebeb614e8860cfa1791143d01ba17f686304c5caf /include
parent8e3c500594dca9a12c27eb6d77b82e0766879bfd (diff)
direct-io: only inc/dec inode->i_dio_count for file systems
do_blockdev_direct_IO() increments and decrements the inode ->i_dio_count for each IO operation. It does this to protect against truncate of a file. Block devices don't need this sort of protection. For a capable multiqueue setup, this atomic int is the only shared state between applications accessing the device for O_DIRECT, and it presents a scaling wall for that. In my testing, as much as 30% of system time is spent incrementing and decrementing this value. A mixed read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with better latencies too. Before: clat percentiles (usec): | 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34], | 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35], | 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80], | 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155], | 99.99th=[ 165] After: clat percentiles (usec): | 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149], | 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171], | 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270], | 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422], | 99.99th=[ 438] In other setups, Robert Elliott reported seeing good performance improvements: https://lkml.org/lkml/2015/4/3/557 The more applications accessing the device, the worse it gets. Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells do_blockdev_direct_IO() that it need not worry about incrementing or decrementing the inode i_dio_count for this caller. Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Elliott, Robert (Server Storage) <elliott@hp.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Jens Axboe <axboe@fb.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'include')
-rw-r--r--include/linux/fs.h29
1 files changed, 28 insertions, 1 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b1d7db28c13c..9055eefa92c7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2635,6 +2635,9 @@ enum {
2635 2635
2636 /* filesystem can handle aio writes beyond i_size */ 2636 /* filesystem can handle aio writes beyond i_size */
2637 DIO_ASYNC_EXTEND = 0x04, 2637 DIO_ASYNC_EXTEND = 0x04,
2638
2639 /* inode/fs/bdev does not need truncate protection */
2640 DIO_SKIP_DIO_COUNT = 0x08,
2638}; 2641};
2639 2642
2640void dio_end_io(struct bio *bio, int error); 2643void dio_end_io(struct bio *bio, int error);
@@ -2657,7 +2660,31 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
2657#endif 2660#endif
2658 2661
2659void inode_dio_wait(struct inode *inode); 2662void inode_dio_wait(struct inode *inode);
2660void inode_dio_done(struct inode *inode); 2663
2664/*
2665 * inode_dio_begin - signal start of a direct I/O requests
2666 * @inode: inode the direct I/O happens on
2667 *
2668 * This is called once we've finished processing a direct I/O request,
2669 * and is used to wake up callers waiting for direct I/O to be quiesced.
2670 */
2671static inline void inode_dio_begin(struct inode *inode)
2672{
2673 atomic_inc(&inode->i_dio_count);
2674}
2675
2676/*
2677 * inode_dio_end - signal finish of a direct I/O requests
2678 * @inode: inode the direct I/O happens on
2679 *
2680 * This is called once we've finished processing a direct I/O request,
2681 * and is used to wake up callers waiting for direct I/O to be quiesced.
2682 */
2683static inline void inode_dio_end(struct inode *inode)
2684{
2685 if (atomic_dec_and_test(&inode->i_dio_count))
2686 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
2687}
2661 2688
2662extern void inode_set_flags(struct inode *inode, unsigned int flags, 2689extern void inode_set_flags(struct inode *inode, unsigned int flags,
2663 unsigned int mask); 2690 unsigned int mask);