diff options
| author | Christoph Hellwig <hch@lst.de> | 2016-07-19 21:38:55 -0400 |
|---|---|---|
| committer | Dave Chinner <david@fromorbit.com> | 2016-07-19 21:38:55 -0400 |
| commit | 16d4d43595b4780daac8fcea6d042689124cb094 (patch) | |
| tree | 991b3b67bba3c837256b1009ef8b8038d587cc7c | |
| parent | fa8d972d055c723cc427e14d4d7919640f418730 (diff) | |
xfs: split direct I/O and DAX path
So far the DAX code overloaded the direct I/O code path. There is very little
in common between the two, and untangling them allows to clean up both variants.
As a side effect we also get separate trace points for both I/O types.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
| -rw-r--r-- | fs/xfs/xfs_file.c | 139 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 2 |
2 files changed, 112 insertions, 29 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index dd5185dafc9f..d97e8cb99a59 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -305,13 +305,11 @@ xfs_file_dio_aio_read( | |||
| 305 | else | 305 | else |
| 306 | target = ip->i_mount->m_ddev_targp; | 306 | target = ip->i_mount->m_ddev_targp; |
| 307 | 307 | ||
| 308 | if (!IS_DAX(inode)) { | 308 | /* DIO must be aligned to device logical sector size */ |
| 309 | /* DIO must be aligned to device logical sector size */ | 309 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { |
| 310 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { | 310 | if (iocb->ki_pos == isize) |
| 311 | if (iocb->ki_pos == isize) | 311 | return 0; |
| 312 | return 0; | 312 | return -EINVAL; |
| 313 | return -EINVAL; | ||
| 314 | } | ||
| 315 | } | 313 | } |
| 316 | 314 | ||
| 317 | /* | 315 | /* |
| @@ -360,13 +358,37 @@ xfs_file_dio_aio_read( | |||
| 360 | } | 358 | } |
| 361 | 359 | ||
| 362 | data = *to; | 360 | data = *to; |
| 363 | if (IS_DAX(inode)) { | 361 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, |
| 364 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | 362 | xfs_get_blocks_direct, NULL, NULL, 0); |
| 365 | NULL, 0); | 363 | if (ret > 0) { |
| 366 | } else { | 364 | iocb->ki_pos += ret; |
| 367 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, | 365 | iov_iter_advance(to, ret); |
| 368 | xfs_get_blocks_direct, NULL, NULL, 0); | ||
| 369 | } | 366 | } |
| 367 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
| 368 | |||
| 369 | file_accessed(iocb->ki_filp); | ||
| 370 | return ret; | ||
| 371 | } | ||
| 372 | |||
| 373 | STATIC ssize_t | ||
| 374 | xfs_file_dax_read( | ||
| 375 | struct kiocb *iocb, | ||
| 376 | struct iov_iter *to) | ||
| 377 | { | ||
| 378 | struct address_space *mapping = iocb->ki_filp->f_mapping; | ||
| 379 | struct inode *inode = mapping->host; | ||
| 380 | struct xfs_inode *ip = XFS_I(inode); | ||
| 381 | struct iov_iter data = *to; | ||
| 382 | size_t count = iov_iter_count(to); | ||
| 383 | ssize_t ret = 0; | ||
| 384 | |||
| 385 | trace_xfs_file_dax_read(ip, count, iocb->ki_pos); | ||
| 386 | |||
| 387 | if (!count) | ||
| 388 | return 0; /* skip atime */ | ||
| 389 | |||
| 390 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | ||
| 391 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); | ||
| 370 | if (ret > 0) { | 392 | if (ret > 0) { |
| 371 | iocb->ki_pos += ret; | 393 | iocb->ki_pos += ret; |
| 372 | iov_iter_advance(to, ret); | 394 | iov_iter_advance(to, ret); |
| @@ -399,7 +421,8 @@ xfs_file_read_iter( | |||
| 399 | struct kiocb *iocb, | 421 | struct kiocb *iocb, |
| 400 | struct iov_iter *to) | 422 | struct iov_iter *to) |
| 401 | { | 423 | { |
| 402 | struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; | 424 | struct inode *inode = file_inode(iocb->ki_filp); |
| 425 | struct xfs_mount *mp = XFS_I(inode)->i_mount; | ||
| 403 | ssize_t ret = 0; | 426 | ssize_t ret = 0; |
| 404 | 427 | ||
| 405 | XFS_STATS_INC(mp, xs_read_calls); | 428 | XFS_STATS_INC(mp, xs_read_calls); |
| @@ -407,7 +430,9 @@ xfs_file_read_iter( | |||
| 407 | if (XFS_FORCED_SHUTDOWN(mp)) | 430 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 408 | return -EIO; | 431 | return -EIO; |
| 409 | 432 | ||
| 410 | if (iocb->ki_flags & IOCB_DIRECT) | 433 | if (IS_DAX(inode)) |
| 434 | ret = xfs_file_dax_read(iocb, to); | ||
| 435 | else if (iocb->ki_flags & IOCB_DIRECT) | ||
| 411 | ret = xfs_file_dio_aio_read(iocb, to); | 436 | ret = xfs_file_dio_aio_read(iocb, to); |
| 412 | else | 437 | else |
| 413 | ret = xfs_file_buffered_aio_read(iocb, to); | 438 | ret = xfs_file_buffered_aio_read(iocb, to); |
| @@ -755,8 +780,7 @@ xfs_file_dio_aio_write( | |||
| 755 | mp->m_rtdev_targp : mp->m_ddev_targp; | 780 | mp->m_rtdev_targp : mp->m_ddev_targp; |
| 756 | 781 | ||
| 757 | /* DIO must be aligned to device logical sector size */ | 782 | /* DIO must be aligned to device logical sector size */ |
| 758 | if (!IS_DAX(inode) && | 783 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) |
| 759 | ((iocb->ki_pos | count) & target->bt_logical_sectormask)) | ||
| 760 | return -EINVAL; | 784 | return -EINVAL; |
| 761 | 785 | ||
| 762 | /* "unaligned" here means not aligned to a filesystem block */ | 786 | /* "unaligned" here means not aligned to a filesystem block */ |
| @@ -825,14 +849,9 @@ xfs_file_dio_aio_write( | |||
| 825 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); | 849 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); |
| 826 | 850 | ||
| 827 | data = *from; | 851 | data = *from; |
| 828 | if (IS_DAX(inode)) { | 852 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, |
| 829 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | 853 | xfs_get_blocks_direct, xfs_end_io_direct_write, |
| 830 | xfs_end_io_direct_write, 0); | 854 | NULL, DIO_ASYNC_EXTEND); |
| 831 | } else { | ||
| 832 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, | ||
| 833 | xfs_get_blocks_direct, xfs_end_io_direct_write, | ||
| 834 | NULL, DIO_ASYNC_EXTEND); | ||
| 835 | } | ||
| 836 | 855 | ||
| 837 | /* see generic_file_direct_write() for why this is necessary */ | 856 | /* see generic_file_direct_write() for why this is necessary */ |
| 838 | if (mapping->nrpages) { | 857 | if (mapping->nrpages) { |
| @@ -849,10 +868,70 @@ out: | |||
| 849 | xfs_rw_iunlock(ip, iolock); | 868 | xfs_rw_iunlock(ip, iolock); |
| 850 | 869 | ||
| 851 | /* | 870 | /* |
| 852 | * No fallback to buffered IO on errors for XFS. DAX can result in | 871 | * No fallback to buffered IO on errors for XFS, direct IO will either |
| 853 | * partial writes, but direct IO will either complete fully or fail. | 872 | * complete fully or fail. |
| 873 | */ | ||
| 874 | ASSERT(ret < 0 || ret == count); | ||
| 875 | return ret; | ||
| 876 | } | ||
| 877 | |||
| 878 | STATIC ssize_t | ||
| 879 | xfs_file_dax_write( | ||
| 880 | struct kiocb *iocb, | ||
| 881 | struct iov_iter *from) | ||
| 882 | { | ||
| 883 | struct address_space *mapping = iocb->ki_filp->f_mapping; | ||
| 884 | struct inode *inode = mapping->host; | ||
| 885 | struct xfs_inode *ip = XFS_I(inode); | ||
| 886 | struct xfs_mount *mp = ip->i_mount; | ||
| 887 | ssize_t ret = 0; | ||
| 888 | int unaligned_io = 0; | ||
| 889 | int iolock; | ||
| 890 | struct iov_iter data; | ||
| 891 | |||
| 892 | /* "unaligned" here means not aligned to a filesystem block */ | ||
| 893 | if ((iocb->ki_pos & mp->m_blockmask) || | ||
| 894 | ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { | ||
| 895 | unaligned_io = 1; | ||
| 896 | iolock = XFS_IOLOCK_EXCL; | ||
| 897 | } else if (mapping->nrpages) { | ||
| 898 | iolock = XFS_IOLOCK_EXCL; | ||
| 899 | } else { | ||
| 900 | iolock = XFS_IOLOCK_SHARED; | ||
| 901 | } | ||
| 902 | xfs_rw_ilock(ip, iolock); | ||
| 903 | |||
| 904 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | ||
| 905 | if (ret) | ||
| 906 | goto out; | ||
| 907 | |||
| 908 | /* | ||
| 909 | * Yes, even DAX files can have page cache attached to them: A zeroed | ||
| 910 | * page is inserted into the pagecache when we have to serve a write | ||
| 911 | * fault on a hole. It should never be dirtied and can simply be | ||
| 912 | * dropped from the pagecache once we get real data for the page. | ||
| 854 | */ | 913 | */ |
| 855 | ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); | 914 | if (mapping->nrpages) { |
| 915 | ret = invalidate_inode_pages2(mapping); | ||
| 916 | WARN_ON_ONCE(ret); | ||
| 917 | } | ||
| 918 | |||
| 919 | if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { | ||
| 920 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | ||
| 921 | iolock = XFS_IOLOCK_SHARED; | ||
| 922 | } | ||
| 923 | |||
| 924 | trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); | ||
| 925 | |||
| 926 | data = *from; | ||
| 927 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | ||
| 928 | xfs_end_io_direct_write, 0); | ||
| 929 | if (ret > 0) { | ||
| 930 | iocb->ki_pos += ret; | ||
| 931 | iov_iter_advance(from, ret); | ||
| 932 | } | ||
| 933 | out: | ||
| 934 | xfs_rw_iunlock(ip, iolock); | ||
| 856 | return ret; | 935 | return ret; |
| 857 | } | 936 | } |
| 858 | 937 | ||
| @@ -934,7 +1013,9 @@ xfs_file_write_iter( | |||
| 934 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 1013 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| 935 | return -EIO; | 1014 | return -EIO; |
| 936 | 1015 | ||
| 937 | if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) | 1016 | if (IS_DAX(inode)) |
| 1017 | ret = xfs_file_dax_write(iocb, from); | ||
| 1018 | else if (iocb->ki_flags & IOCB_DIRECT) | ||
| 938 | ret = xfs_file_dio_aio_write(iocb, from); | 1019 | ret = xfs_file_dio_aio_write(iocb, from); |
| 939 | else | 1020 | else |
| 940 | ret = xfs_file_buffered_aio_write(iocb, from); | 1021 | ret = xfs_file_buffered_aio_write(iocb, from); |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a1bc5c64a573..c2876917dd89 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -1164,8 +1164,10 @@ DEFINE_EVENT(xfs_file_class, name, \ | |||
| 1164 | TP_ARGS(ip, count, offset)) | 1164 | TP_ARGS(ip, count, offset)) |
| 1165 | DEFINE_RW_EVENT(xfs_file_buffered_read); | 1165 | DEFINE_RW_EVENT(xfs_file_buffered_read); |
| 1166 | DEFINE_RW_EVENT(xfs_file_direct_read); | 1166 | DEFINE_RW_EVENT(xfs_file_direct_read); |
| 1167 | DEFINE_RW_EVENT(xfs_file_dax_read); | ||
| 1167 | DEFINE_RW_EVENT(xfs_file_buffered_write); | 1168 | DEFINE_RW_EVENT(xfs_file_buffered_write); |
| 1168 | DEFINE_RW_EVENT(xfs_file_direct_write); | 1169 | DEFINE_RW_EVENT(xfs_file_direct_write); |
| 1170 | DEFINE_RW_EVENT(xfs_file_dax_write); | ||
| 1169 | DEFINE_RW_EVENT(xfs_file_splice_read); | 1171 | DEFINE_RW_EVENT(xfs_file_splice_read); |
| 1170 | 1172 | ||
| 1171 | DECLARE_EVENT_CLASS(xfs_page_class, | 1173 | DECLARE_EVENT_CLASS(xfs_page_class, |
