diff options
author | Jens Axboe <axboe@fb.com> | 2015-04-15 19:05:48 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2015-04-24 15:45:28 -0400 |
commit | fe0f07d08ee35fb13d2cb048970072fe4f71ad14 (patch) | |
tree | beb614e8860cfa1791143d01ba17f686304c5caf /fs/nfs | |
parent | 8e3c500594dca9a12c27eb6d77b82e0766879bfd (diff) |
direct-io: only inc/dec inode->i_dio_count for file systems
do_blockdev_direct_IO() increments and decrements the inode
->i_dio_count for each IO operation. It does this to protect against
truncate of a file. Block devices don't need this sort of protection.
For a capable multiqueue setup, this atomic int is the only shared
state between applications accessing the device for O_DIRECT, and it
presents a scaling wall for that. In my testing, as much as 30% of
system time is spent incrementing and decrementing this value. A mixed
read/write workload improved from ~2.5M IOPS to ~9.6M IOPS, with
better latencies too. Before:
clat percentiles (usec):
| 1.00th=[ 33], 5.00th=[ 34], 10.00th=[ 34], 20.00th=[ 34],
| 30.00th=[ 34], 40.00th=[ 34], 50.00th=[ 35], 60.00th=[ 35],
| 70.00th=[ 35], 80.00th=[ 35], 90.00th=[ 37], 95.00th=[ 80],
| 99.00th=[ 98], 99.50th=[ 151], 99.90th=[ 155], 99.95th=[ 155],
| 99.99th=[ 165]
After:
clat percentiles (usec):
| 1.00th=[ 95], 5.00th=[ 108], 10.00th=[ 129], 20.00th=[ 149],
| 30.00th=[ 155], 40.00th=[ 161], 50.00th=[ 167], 60.00th=[ 171],
| 70.00th=[ 177], 80.00th=[ 185], 90.00th=[ 201], 95.00th=[ 270],
| 99.00th=[ 390], 99.50th=[ 398], 99.90th=[ 418], 99.95th=[ 422],
| 99.99th=[ 438]
In other setups, Robert Elliott reported seeing good performance
improvements:
https://lkml.org/lkml/2015/4/3/557
The more applications accessing the device, the worse it gets.
Add a new direct-io flags, DIO_SKIP_DIO_COUNT, which tells
do_blockdev_direct_IO() that it need not worry about incrementing
or decrementing the inode i_dio_count for this caller.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Elliott, Robert (Server Storage) <elliott@hp.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/direct.c | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ed0e6031be88..b2cbc3a6cdd9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -386,7 +386,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) | |||
386 | if (write) | 386 | if (write) |
387 | nfs_zap_mapping(inode, inode->i_mapping); | 387 | nfs_zap_mapping(inode, inode->i_mapping); |
388 | 388 | ||
389 | inode_dio_done(inode); | 389 | inode_dio_end(inode); |
390 | 390 | ||
391 | if (dreq->iocb) { | 391 | if (dreq->iocb) { |
392 | long res = (long) dreq->error; | 392 | long res = (long) dreq->error; |
@@ -486,7 +486,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
486 | &nfs_direct_read_completion_ops); | 486 | &nfs_direct_read_completion_ops); |
487 | get_dreq(dreq); | 487 | get_dreq(dreq); |
488 | desc.pg_dreq = dreq; | 488 | desc.pg_dreq = dreq; |
489 | atomic_inc(&inode->i_dio_count); | 489 | inode_dio_begin(inode); |
490 | 490 | ||
491 | while (iov_iter_count(iter)) { | 491 | while (iov_iter_count(iter)) { |
492 | struct page **pagevec; | 492 | struct page **pagevec; |
@@ -538,7 +538,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
538 | * generic layer handle the completion. | 538 | * generic layer handle the completion. |
539 | */ | 539 | */ |
540 | if (requested_bytes == 0) { | 540 | if (requested_bytes == 0) { |
541 | inode_dio_done(inode); | 541 | inode_dio_end(inode); |
542 | nfs_direct_req_release(dreq); | 542 | nfs_direct_req_release(dreq); |
543 | return result < 0 ? result : -EIO; | 543 | return result < 0 ? result : -EIO; |
544 | } | 544 | } |
@@ -872,7 +872,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
872 | &nfs_direct_write_completion_ops); | 872 | &nfs_direct_write_completion_ops); |
873 | desc.pg_dreq = dreq; | 873 | desc.pg_dreq = dreq; |
874 | get_dreq(dreq); | 874 | get_dreq(dreq); |
875 | atomic_inc(&inode->i_dio_count); | 875 | inode_dio_begin(inode); |
876 | 876 | ||
877 | NFS_I(inode)->write_io += iov_iter_count(iter); | 877 | NFS_I(inode)->write_io += iov_iter_count(iter); |
878 | while (iov_iter_count(iter)) { | 878 | while (iov_iter_count(iter)) { |
@@ -928,7 +928,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
928 | * generic layer handle the completion. | 928 | * generic layer handle the completion. |
929 | */ | 929 | */ |
930 | if (requested_bytes == 0) { | 930 | if (requested_bytes == 0) { |
931 | inode_dio_done(inode); | 931 | inode_dio_end(inode); |
932 | nfs_direct_req_release(dreq); | 932 | nfs_direct_req_release(dreq); |
933 | return result < 0 ? result : -EIO; | 933 | return result < 0 ? result : -EIO; |
934 | } | 934 | } |