diff options
author | Christoph Hellwig <hch@lst.de> | 2016-07-19 21:38:55 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-07-19 21:38:55 -0400 |
commit | 16d4d43595b4780daac8fcea6d042689124cb094 (patch) | |
tree | 991b3b67bba3c837256b1009ef8b8038d587cc7c | |
parent | fa8d972d055c723cc427e14d4d7919640f418730 (diff) |
xfs: split direct I/O and DAX path
So far the DAX code overloaded the direct I/O code path. There is very little
in common between the two, and untangling them allows to clean up both variants.
As a side effect we also get separate trace points for both I/O types.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r-- | fs/xfs/xfs_file.c | 139 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 2 |
2 files changed, 112 insertions, 29 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index dd5185dafc9f..d97e8cb99a59 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -305,13 +305,11 @@ xfs_file_dio_aio_read( | |||
305 | else | 305 | else |
306 | target = ip->i_mount->m_ddev_targp; | 306 | target = ip->i_mount->m_ddev_targp; |
307 | 307 | ||
308 | if (!IS_DAX(inode)) { | 308 | /* DIO must be aligned to device logical sector size */ |
309 | /* DIO must be aligned to device logical sector size */ | 309 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { |
310 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { | 310 | if (iocb->ki_pos == isize) |
311 | if (iocb->ki_pos == isize) | 311 | return 0; |
312 | return 0; | 312 | return -EINVAL; |
313 | return -EINVAL; | ||
314 | } | ||
315 | } | 313 | } |
316 | 314 | ||
317 | /* | 315 | /* |
@@ -360,13 +358,37 @@ xfs_file_dio_aio_read( | |||
360 | } | 358 | } |
361 | 359 | ||
362 | data = *to; | 360 | data = *to; |
363 | if (IS_DAX(inode)) { | 361 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, |
364 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | 362 | xfs_get_blocks_direct, NULL, NULL, 0); |
365 | NULL, 0); | 363 | if (ret > 0) { |
366 | } else { | 364 | iocb->ki_pos += ret; |
367 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, | 365 | iov_iter_advance(to, ret); |
368 | xfs_get_blocks_direct, NULL, NULL, 0); | ||
369 | } | 366 | } |
367 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | ||
368 | |||
369 | file_accessed(iocb->ki_filp); | ||
370 | return ret; | ||
371 | } | ||
372 | |||
373 | STATIC ssize_t | ||
374 | xfs_file_dax_read( | ||
375 | struct kiocb *iocb, | ||
376 | struct iov_iter *to) | ||
377 | { | ||
378 | struct address_space *mapping = iocb->ki_filp->f_mapping; | ||
379 | struct inode *inode = mapping->host; | ||
380 | struct xfs_inode *ip = XFS_I(inode); | ||
381 | struct iov_iter data = *to; | ||
382 | size_t count = iov_iter_count(to); | ||
383 | ssize_t ret = 0; | ||
384 | |||
385 | trace_xfs_file_dax_read(ip, count, iocb->ki_pos); | ||
386 | |||
387 | if (!count) | ||
388 | return 0; /* skip atime */ | ||
389 | |||
390 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | ||
391 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); | ||
370 | if (ret > 0) { | 392 | if (ret > 0) { |
371 | iocb->ki_pos += ret; | 393 | iocb->ki_pos += ret; |
372 | iov_iter_advance(to, ret); | 394 | iov_iter_advance(to, ret); |
@@ -399,7 +421,8 @@ xfs_file_read_iter( | |||
399 | struct kiocb *iocb, | 421 | struct kiocb *iocb, |
400 | struct iov_iter *to) | 422 | struct iov_iter *to) |
401 | { | 423 | { |
402 | struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; | 424 | struct inode *inode = file_inode(iocb->ki_filp); |
425 | struct xfs_mount *mp = XFS_I(inode)->i_mount; | ||
403 | ssize_t ret = 0; | 426 | ssize_t ret = 0; |
404 | 427 | ||
405 | XFS_STATS_INC(mp, xs_read_calls); | 428 | XFS_STATS_INC(mp, xs_read_calls); |
@@ -407,7 +430,9 @@ xfs_file_read_iter( | |||
407 | if (XFS_FORCED_SHUTDOWN(mp)) | 430 | if (XFS_FORCED_SHUTDOWN(mp)) |
408 | return -EIO; | 431 | return -EIO; |
409 | 432 | ||
410 | if (iocb->ki_flags & IOCB_DIRECT) | 433 | if (IS_DAX(inode)) |
434 | ret = xfs_file_dax_read(iocb, to); | ||
435 | else if (iocb->ki_flags & IOCB_DIRECT) | ||
411 | ret = xfs_file_dio_aio_read(iocb, to); | 436 | ret = xfs_file_dio_aio_read(iocb, to); |
412 | else | 437 | else |
413 | ret = xfs_file_buffered_aio_read(iocb, to); | 438 | ret = xfs_file_buffered_aio_read(iocb, to); |
@@ -755,8 +780,7 @@ xfs_file_dio_aio_write( | |||
755 | mp->m_rtdev_targp : mp->m_ddev_targp; | 780 | mp->m_rtdev_targp : mp->m_ddev_targp; |
756 | 781 | ||
757 | /* DIO must be aligned to device logical sector size */ | 782 | /* DIO must be aligned to device logical sector size */ |
758 | if (!IS_DAX(inode) && | 783 | if ((iocb->ki_pos | count) & target->bt_logical_sectormask) |
759 | ((iocb->ki_pos | count) & target->bt_logical_sectormask)) | ||
760 | return -EINVAL; | 784 | return -EINVAL; |
761 | 785 | ||
762 | /* "unaligned" here means not aligned to a filesystem block */ | 786 | /* "unaligned" here means not aligned to a filesystem block */ |
@@ -825,14 +849,9 @@ xfs_file_dio_aio_write( | |||
825 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); | 849 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos); |
826 | 850 | ||
827 | data = *from; | 851 | data = *from; |
828 | if (IS_DAX(inode)) { | 852 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, |
829 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | 853 | xfs_get_blocks_direct, xfs_end_io_direct_write, |
830 | xfs_end_io_direct_write, 0); | 854 | NULL, DIO_ASYNC_EXTEND); |
831 | } else { | ||
832 | ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, | ||
833 | xfs_get_blocks_direct, xfs_end_io_direct_write, | ||
834 | NULL, DIO_ASYNC_EXTEND); | ||
835 | } | ||
836 | 855 | ||
837 | /* see generic_file_direct_write() for why this is necessary */ | 856 | /* see generic_file_direct_write() for why this is necessary */ |
838 | if (mapping->nrpages) { | 857 | if (mapping->nrpages) { |
@@ -849,10 +868,70 @@ out: | |||
849 | xfs_rw_iunlock(ip, iolock); | 868 | xfs_rw_iunlock(ip, iolock); |
850 | 869 | ||
851 | /* | 870 | /* |
852 | * No fallback to buffered IO on errors for XFS. DAX can result in | 871 | * No fallback to buffered IO on errors for XFS, direct IO will either |
853 | * partial writes, but direct IO will either complete fully or fail. | 872 | * complete fully or fail. |
873 | */ | ||
874 | ASSERT(ret < 0 || ret == count); | ||
875 | return ret; | ||
876 | } | ||
877 | |||
878 | STATIC ssize_t | ||
879 | xfs_file_dax_write( | ||
880 | struct kiocb *iocb, | ||
881 | struct iov_iter *from) | ||
882 | { | ||
883 | struct address_space *mapping = iocb->ki_filp->f_mapping; | ||
884 | struct inode *inode = mapping->host; | ||
885 | struct xfs_inode *ip = XFS_I(inode); | ||
886 | struct xfs_mount *mp = ip->i_mount; | ||
887 | ssize_t ret = 0; | ||
888 | int unaligned_io = 0; | ||
889 | int iolock; | ||
890 | struct iov_iter data; | ||
891 | |||
892 | /* "unaligned" here means not aligned to a filesystem block */ | ||
893 | if ((iocb->ki_pos & mp->m_blockmask) || | ||
894 | ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { | ||
895 | unaligned_io = 1; | ||
896 | iolock = XFS_IOLOCK_EXCL; | ||
897 | } else if (mapping->nrpages) { | ||
898 | iolock = XFS_IOLOCK_EXCL; | ||
899 | } else { | ||
900 | iolock = XFS_IOLOCK_SHARED; | ||
901 | } | ||
902 | xfs_rw_ilock(ip, iolock); | ||
903 | |||
904 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | ||
905 | if (ret) | ||
906 | goto out; | ||
907 | |||
908 | /* | ||
909 | * Yes, even DAX files can have page cache attached to them: A zeroed | ||
910 | * page is inserted into the pagecache when we have to serve a write | ||
911 | * fault on a hole. It should never be dirtied and can simply be | ||
912 | * dropped from the pagecache once we get real data for the page. | ||
854 | */ | 913 | */ |
855 | ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); | 914 | if (mapping->nrpages) { |
915 | ret = invalidate_inode_pages2(mapping); | ||
916 | WARN_ON_ONCE(ret); | ||
917 | } | ||
918 | |||
919 | if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { | ||
920 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | ||
921 | iolock = XFS_IOLOCK_SHARED; | ||
922 | } | ||
923 | |||
924 | trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); | ||
925 | |||
926 | data = *from; | ||
927 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | ||
928 | xfs_end_io_direct_write, 0); | ||
929 | if (ret > 0) { | ||
930 | iocb->ki_pos += ret; | ||
931 | iov_iter_advance(from, ret); | ||
932 | } | ||
933 | out: | ||
934 | xfs_rw_iunlock(ip, iolock); | ||
856 | return ret; | 935 | return ret; |
857 | } | 936 | } |
858 | 937 | ||
@@ -934,7 +1013,9 @@ xfs_file_write_iter( | |||
934 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 1013 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
935 | return -EIO; | 1014 | return -EIO; |
936 | 1015 | ||
937 | if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) | 1016 | if (IS_DAX(inode)) |
1017 | ret = xfs_file_dax_write(iocb, from); | ||
1018 | else if (iocb->ki_flags & IOCB_DIRECT) | ||
938 | ret = xfs_file_dio_aio_write(iocb, from); | 1019 | ret = xfs_file_dio_aio_write(iocb, from); |
939 | else | 1020 | else |
940 | ret = xfs_file_buffered_aio_write(iocb, from); | 1021 | ret = xfs_file_buffered_aio_write(iocb, from); |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a1bc5c64a573..c2876917dd89 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -1164,8 +1164,10 @@ DEFINE_EVENT(xfs_file_class, name, \ | |||
1164 | TP_ARGS(ip, count, offset)) | 1164 | TP_ARGS(ip, count, offset)) |
1165 | DEFINE_RW_EVENT(xfs_file_buffered_read); | 1165 | DEFINE_RW_EVENT(xfs_file_buffered_read); |
1166 | DEFINE_RW_EVENT(xfs_file_direct_read); | 1166 | DEFINE_RW_EVENT(xfs_file_direct_read); |
1167 | DEFINE_RW_EVENT(xfs_file_dax_read); | ||
1167 | DEFINE_RW_EVENT(xfs_file_buffered_write); | 1168 | DEFINE_RW_EVENT(xfs_file_buffered_write); |
1168 | DEFINE_RW_EVENT(xfs_file_direct_write); | 1169 | DEFINE_RW_EVENT(xfs_file_direct_write); |
1170 | DEFINE_RW_EVENT(xfs_file_dax_write); | ||
1169 | DEFINE_RW_EVENT(xfs_file_splice_read); | 1171 | DEFINE_RW_EVENT(xfs_file_splice_read); |
1170 | 1172 | ||
1171 | DECLARE_EVENT_CLASS(xfs_page_class, | 1173 | DECLARE_EVENT_CLASS(xfs_page_class, |