diff options
author | Jens Axboe <axboe@fb.com> | 2015-04-15 19:05:48 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2015-04-24 15:45:28 -0400 |
commit | fe0f07d08ee35fb13d2cb048970072fe4f71ad14 (patch) | |
tree | beb614e8860cfa1791143d01ba17f686304c5caf /fs/ext4 | |
parent | 8e3c500594dca9a12c27eb6d77b82e0766879bfd (diff) |
direct-io: only inc/dec inode->i_dio_count for file systems
do_blockdev_direct_IO() increments and decrements the inode
->i_dio_count for each IO operation. It does this to protect against
truncate of a file. Block devices don't need this sort of protection.
For a capable multiqueue setup, this atomic int is the only shared
state between applications accessing the device for O_DIRECT, and it
presents a scaling wall for that. In my testing, as much as 30% of
system time is spent incrementing and decrementing this value. A mixed
read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with
better latencies too. Before:
clat percentiles (usec):
| 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34],
| 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35],
| 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80],
| 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155],
| 99.99th=[ 165]
After:
clat percentiles (usec):
| 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149],
| 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171],
| 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270],
| 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422],
| 99.99th=[ 438]
In other setups, Robert Elliott reported seeing good performance
improvements:
https://lkml.org/lkml/2015/4/3/557
The more applications accessing the device, the worse it gets.
Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells
do_blockdev_direct_IO() that it need not worry about incrementing
or decrementing the inode i_dio_count for this caller.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Elliott, Robert (Server Storage) <elliott@hp.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/indirect.c | 6 | ||||
-rw-r--r-- | fs/ext4/inode.c | 4 |
2 files changed, 5 insertions, 5 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3580629e42d3..958824019509 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
@@ -682,11 +682,11 @@ retry: | |||
682 | * via ext4_inode_block_unlocked_dio(). Check inode's state | 682 | * via ext4_inode_block_unlocked_dio(). Check inode's state |
683 | * while holding extra i_dio_count ref. | 683 | * while holding extra i_dio_count ref. |
684 | */ | 684 | */ |
685 | atomic_inc(&inode->i_dio_count); | 685 | inode_dio_begin(inode); |
686 | smp_mb(); | 686 | smp_mb(); |
687 | if (unlikely(ext4_test_inode_state(inode, | 687 | if (unlikely(ext4_test_inode_state(inode, |
688 | EXT4_STATE_DIOREAD_LOCK))) { | 688 | EXT4_STATE_DIOREAD_LOCK))) { |
689 | inode_dio_done(inode); | 689 | inode_dio_end(inode); |
690 | goto locked; | 690 | goto locked; |
691 | } | 691 | } |
692 | if (IS_DAX(inode)) | 692 | if (IS_DAX(inode)) |
@@ -697,7 +697,7 @@ retry: | |||
697 | inode->i_sb->s_bdev, iter, | 697 | inode->i_sb->s_bdev, iter, |
698 | offset, ext4_get_block, NULL, | 698 | offset, ext4_get_block, NULL, |
699 | NULL, 0); | 699 | NULL, 0); |
700 | inode_dio_done(inode); | 700 | inode_dio_end(inode); |
701 | } else { | 701 | } else { |
702 | locked: | 702 | locked: |
703 | if (IS_DAX(inode)) | 703 | if (IS_DAX(inode)) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 063052e4aa8b..bccec41fb94b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2977,7 +2977,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
2977 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | 2977 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. |
2978 | */ | 2978 | */ |
2979 | if (iov_iter_rw(iter) == WRITE) | 2979 | if (iov_iter_rw(iter) == WRITE) |
2980 | atomic_inc(&inode->i_dio_count); | 2980 | inode_dio_begin(inode); |
2981 | 2981 | ||
2982 | /* If we do a overwrite dio, i_mutex locking can be released */ | 2982 | /* If we do a overwrite dio, i_mutex locking can be released */ |
2983 | overwrite = *((int *)iocb->private); | 2983 | overwrite = *((int *)iocb->private); |
@@ -3079,7 +3079,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3079 | 3079 | ||
3080 | retake_lock: | 3080 | retake_lock: |
3081 | if (iov_iter_rw(iter) == WRITE) | 3081 | if (iov_iter_rw(iter) == WRITE) |
3082 | inode_dio_done(inode); | 3082 | inode_dio_end(inode); |
3083 | /* take i_mutex locking again if we do a ovewrite dio */ | 3083 | /* take i_mutex locking again if we do a ovewrite dio */ |
3084 | if (overwrite) { | 3084 | if (overwrite) { |
3085 | up_read(&EXT4_I(inode)->i_data_sem); | 3085 | up_read(&EXT4_I(inode)->i_data_sem); |