aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2015-04-15 19:05:48 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2015-04-24 15:45:28 -0400
commitfe0f07d08ee35fb13d2cb048970072fe4f71ad14 (patch)
treebeb614e8860cfa1791143d01ba17f686304c5caf /fs/nfs
parent8e3c500594dca9a12c27eb6d77b82e0766879bfd (diff)
direct-io: only inc/dec inode->i_dio_count for file systems
do_blockdev_direct_IO() increments and decrements the inode ->i_dio_count for each IO operation. It does this to protect against truncate of a file. Block devices don't need this sort of protection. For a capable multiqueue setup, this atomic int is the only shared state between applications accessing the device for O_DIRECT, and it presents a scaling wall for that. In my testing, as much as 30% of system time is spent incrementing and decrementing this value. A mixed read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with better latencies too. Before: clat percentiles (usec): | 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34], | 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35], | 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80], | 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155], | 99.99th=[ 165] After: clat percentiles (usec): | 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149], | 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171], | 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270], | 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422], | 99.99th=[ 438] In other setups, Robert Elliott reported seeing good performance improvements: https://lkml.org/lkml/2015/4/3/557 The more applications accessing the device, the worse it gets. Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells do_blockdev_direct_IO() that it need not worry about incrementing or decrementing the inode i_dio_count for this caller. Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Elliott, Robert (Server Storage) <elliott@hp.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Jens Axboe <axboe@fb.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/direct.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ed0e6031be88..b2cbc3a6cdd9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -386,7 +386,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
386 if (write) 386 if (write)
387 nfs_zap_mapping(inode, inode->i_mapping); 387 nfs_zap_mapping(inode, inode->i_mapping);
388 388
389 inode_dio_done(inode); 389 inode_dio_end(inode);
390 390
391 if (dreq->iocb) { 391 if (dreq->iocb) {
392 long res = (long) dreq->error; 392 long res = (long) dreq->error;
@@ -486,7 +486,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
486 &nfs_direct_read_completion_ops); 486 &nfs_direct_read_completion_ops);
487 get_dreq(dreq); 487 get_dreq(dreq);
488 desc.pg_dreq = dreq; 488 desc.pg_dreq = dreq;
489 atomic_inc(&inode->i_dio_count); 489 inode_dio_begin(inode);
490 490
491 while (iov_iter_count(iter)) { 491 while (iov_iter_count(iter)) {
492 struct page **pagevec; 492 struct page **pagevec;
@@ -538,7 +538,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
538 * generic layer handle the completion. 538 * generic layer handle the completion.
539 */ 539 */
540 if (requested_bytes == 0) { 540 if (requested_bytes == 0) {
541 inode_dio_done(inode); 541 inode_dio_end(inode);
542 nfs_direct_req_release(dreq); 542 nfs_direct_req_release(dreq);
543 return result < 0 ? result : -EIO; 543 return result < 0 ? result : -EIO;
544 } 544 }
@@ -872,7 +872,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
872 &nfs_direct_write_completion_ops); 872 &nfs_direct_write_completion_ops);
873 desc.pg_dreq = dreq; 873 desc.pg_dreq = dreq;
874 get_dreq(dreq); 874 get_dreq(dreq);
875 atomic_inc(&inode->i_dio_count); 875 inode_dio_begin(inode);
876 876
877 NFS_I(inode)->write_io += iov_iter_count(iter); 877 NFS_I(inode)->write_io += iov_iter_count(iter);
878 while (iov_iter_count(iter)) { 878 while (iov_iter_count(iter)) {
@@ -928,7 +928,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
928 * generic layer handle the completion. 928 * generic layer handle the completion.
929 */ 929 */
930 if (requested_bytes == 0) { 930 if (requested_bytes == 0) {
931 inode_dio_done(inode); 931 inode_dio_end(inode);
932 nfs_direct_req_release(dreq); 932 nfs_direct_req_release(dreq);
933 return result < 0 ? result : -EIO; 933 return result < 0 ? result : -EIO;
934 } 934 }