aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2015-04-15 19:05:48 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-04-24 15:45:28 -0400
commitfe0f07d08ee35fb13d2cb048970072fe4f71ad14 (patch)
treebeb614e8860cfa1791143d01ba17f686304c5caf /fs/ext4
parent8e3c500594dca9a12c27eb6d77b82e0766879bfd (diff)
direct-io: only inc/dec inode->i_dio_count for file systems
do_blockdev_direct_IO() increments and decrements the inode ->i_dio_count for each IO operation. It does this to protect against truncate of a file. Block devices don't need this sort of protection. For a capable multiqueue setup, this atomic int is the only shared state between applications accessing the device for O_DIRECT, and it presents a scaling wall for that. In my testing, as much as 30% of system time is spent incrementing and decrementing this value. A mixed read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with better latencies too. Before: clat percentiles (usec): | 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34], | 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35], | 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80], | 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155], | 99.99th=[ 165] After: clat percentiles (usec): | 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149], | 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171], | 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270], | 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422], | 99.99th=[ 438] In other setups, Robert Elliott reported seeing good performance improvements: https://lkml.org/lkml/2015/4/3/557 The more applications accessing the device, the worse it gets. Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells do_blockdev_direct_IO() that it need not worry about incrementing or decrementing the inode i_dio_count for this caller. Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Elliott, Robert (Server Storage) <elliott@hp.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Jens Axboe <axboe@fb.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/indirect.c6
-rw-r--r--fs/ext4/inode.c4
2 files changed, 5 insertions, 5 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 3580629e42d3..958824019509 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -682,11 +682,11 @@ retry:
682 * via ext4_inode_block_unlocked_dio(). Check inode's state 682 * via ext4_inode_block_unlocked_dio(). Check inode's state
683 * while holding extra i_dio_count ref. 683 * while holding extra i_dio_count ref.
684 */ 684 */
685 atomic_inc(&inode->i_dio_count); 685 inode_dio_begin(inode);
686 smp_mb(); 686 smp_mb();
687 if (unlikely(ext4_test_inode_state(inode, 687 if (unlikely(ext4_test_inode_state(inode,
688 EXT4_STATE_DIOREAD_LOCK))) { 688 EXT4_STATE_DIOREAD_LOCK))) {
689 inode_dio_done(inode); 689 inode_dio_end(inode);
690 goto locked; 690 goto locked;
691 } 691 }
692 if (IS_DAX(inode)) 692 if (IS_DAX(inode))
@@ -697,7 +697,7 @@ retry:
697 inode->i_sb->s_bdev, iter, 697 inode->i_sb->s_bdev, iter,
698 offset, ext4_get_block, NULL, 698 offset, ext4_get_block, NULL,
699 NULL, 0); 699 NULL, 0);
700 inode_dio_done(inode); 700 inode_dio_end(inode);
701 } else { 701 } else {
702locked: 702locked:
703 if (IS_DAX(inode)) 703 if (IS_DAX(inode))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 063052e4aa8b..bccec41fb94b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2977,7 +2977,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
2977 * overwrite DIO as i_dio_count needs to be incremented under i_mutex. 2977 * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
2978 */ 2978 */
2979 if (iov_iter_rw(iter) == WRITE) 2979 if (iov_iter_rw(iter) == WRITE)
2980 atomic_inc(&inode->i_dio_count); 2980 inode_dio_begin(inode);
2981 2981
2982 /* If we do a overwrite dio, i_mutex locking can be released */ 2982 /* If we do a overwrite dio, i_mutex locking can be released */
2983 overwrite = *((int *)iocb->private); 2983 overwrite = *((int *)iocb->private);
@@ -3079,7 +3079,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3079 3079
3080retake_lock: 3080retake_lock:
3081 if (iov_iter_rw(iter) == WRITE) 3081 if (iov_iter_rw(iter) == WRITE)
3082 inode_dio_done(inode); 3082 inode_dio_end(inode);
3083 /* take i_mutex locking again if we do a ovewrite dio */ 3083 /* take i_mutex locking again if we do a ovewrite dio */
3084 if (overwrite) { 3084 if (overwrite) {
3085 up_read(&EXT4_I(inode)->i_data_sem); 3085 up_read(&EXT4_I(inode)->i_data_sem);