aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-07-19 21:38:55 -0400
committerDave Chinner <david@fromorbit.com>2016-07-19 21:38:55 -0400
commit16d4d43595b4780daac8fcea6d042689124cb094 (patch)
tree991b3b67bba3c837256b1009ef8b8038d587cc7c
parentfa8d972d055c723cc427e14d4d7919640f418730 (diff)
xfs: split direct I/O and DAX path
So far the DAX code overloaded the direct I/O code path. There is very little in common between the two, and untangling them allows to clean up both variants. As a side effect we also get separate trace points for both I/O types. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_file.c139
-rw-r--r--fs/xfs/xfs_trace.h2
2 files changed, 112 insertions, 29 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index dd5185dafc9f..d97e8cb99a59 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -305,13 +305,11 @@ xfs_file_dio_aio_read(
305 else 305 else
306 target = ip->i_mount->m_ddev_targp; 306 target = ip->i_mount->m_ddev_targp;
307 307
308 if (!IS_DAX(inode)) { 308 /* DIO must be aligned to device logical sector size */
309 /* DIO must be aligned to device logical sector size */ 309 if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
310 if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { 310 if (iocb->ki_pos == isize)
311 if (iocb->ki_pos == isize) 311 return 0;
312 return 0; 312 return -EINVAL;
313 return -EINVAL;
314 }
315 } 313 }
316 314
317 /* 315 /*
@@ -360,13 +358,37 @@ xfs_file_dio_aio_read(
360 } 358 }
361 359
362 data = *to; 360 data = *to;
363 if (IS_DAX(inode)) { 361 ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
364 ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, 362 xfs_get_blocks_direct, NULL, NULL, 0);
365 NULL, 0); 363 if (ret > 0) {
366 } else { 364 iocb->ki_pos += ret;
367 ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, 365 iov_iter_advance(to, ret);
368 xfs_get_blocks_direct, NULL, NULL, 0);
369 } 366 }
367 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
368
369 file_accessed(iocb->ki_filp);
370 return ret;
371}
372
373STATIC ssize_t
374xfs_file_dax_read(
375 struct kiocb *iocb,
376 struct iov_iter *to)
377{
378 struct address_space *mapping = iocb->ki_filp->f_mapping;
379 struct inode *inode = mapping->host;
380 struct xfs_inode *ip = XFS_I(inode);
381 struct iov_iter data = *to;
382 size_t count = iov_iter_count(to);
383 ssize_t ret = 0;
384
385 trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
386
387 if (!count)
388 return 0; /* skip atime */
389
390 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
391 ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
370 if (ret > 0) { 392 if (ret > 0) {
371 iocb->ki_pos += ret; 393 iocb->ki_pos += ret;
372 iov_iter_advance(to, ret); 394 iov_iter_advance(to, ret);
@@ -399,7 +421,8 @@ xfs_file_read_iter(
399 struct kiocb *iocb, 421 struct kiocb *iocb,
400 struct iov_iter *to) 422 struct iov_iter *to)
401{ 423{
402 struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; 424 struct inode *inode = file_inode(iocb->ki_filp);
425 struct xfs_mount *mp = XFS_I(inode)->i_mount;
403 ssize_t ret = 0; 426 ssize_t ret = 0;
404 427
405 XFS_STATS_INC(mp, xs_read_calls); 428 XFS_STATS_INC(mp, xs_read_calls);
@@ -407,7 +430,9 @@ xfs_file_read_iter(
407 if (XFS_FORCED_SHUTDOWN(mp)) 430 if (XFS_FORCED_SHUTDOWN(mp))
408 return -EIO; 431 return -EIO;
409 432
410 if (iocb->ki_flags & IOCB_DIRECT) 433 if (IS_DAX(inode))
434 ret = xfs_file_dax_read(iocb, to);
435 else if (iocb->ki_flags & IOCB_DIRECT)
411 ret = xfs_file_dio_aio_read(iocb, to); 436 ret = xfs_file_dio_aio_read(iocb, to);
412 else 437 else
413 ret = xfs_file_buffered_aio_read(iocb, to); 438 ret = xfs_file_buffered_aio_read(iocb, to);
@@ -755,8 +780,7 @@ xfs_file_dio_aio_write(
755 mp->m_rtdev_targp : mp->m_ddev_targp; 780 mp->m_rtdev_targp : mp->m_ddev_targp;
756 781
757 /* DIO must be aligned to device logical sector size */ 782 /* DIO must be aligned to device logical sector size */
758 if (!IS_DAX(inode) && 783 if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
759 ((iocb->ki_pos | count) & target->bt_logical_sectormask))
760 return -EINVAL; 784 return -EINVAL;
761 785
762 /* "unaligned" here means not aligned to a filesystem block */ 786 /* "unaligned" here means not aligned to a filesystem block */
@@ -825,14 +849,9 @@ xfs_file_dio_aio_write(
825 trace_xfs_file_direct_write(ip, count, iocb->ki_pos); 849 trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
826 850
827 data = *from; 851 data = *from;
828 if (IS_DAX(inode)) { 852 ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
829 ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, 853 xfs_get_blocks_direct, xfs_end_io_direct_write,
830 xfs_end_io_direct_write, 0); 854 NULL, DIO_ASYNC_EXTEND);
831 } else {
832 ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
833 xfs_get_blocks_direct, xfs_end_io_direct_write,
834 NULL, DIO_ASYNC_EXTEND);
835 }
836 855
837 /* see generic_file_direct_write() for why this is necessary */ 856 /* see generic_file_direct_write() for why this is necessary */
838 if (mapping->nrpages) { 857 if (mapping->nrpages) {
@@ -849,10 +868,70 @@ out:
849 xfs_rw_iunlock(ip, iolock); 868 xfs_rw_iunlock(ip, iolock);
850 869
851 /* 870 /*
852 * No fallback to buffered IO on errors for XFS. DAX can result in 871 * No fallback to buffered IO on errors for XFS, direct IO will either
853 * partial writes, but direct IO will either complete fully or fail. 872 * complete fully or fail.
873 */
874 ASSERT(ret < 0 || ret == count);
875 return ret;
876}
877
878STATIC ssize_t
879xfs_file_dax_write(
880 struct kiocb *iocb,
881 struct iov_iter *from)
882{
883 struct address_space *mapping = iocb->ki_filp->f_mapping;
884 struct inode *inode = mapping->host;
885 struct xfs_inode *ip = XFS_I(inode);
886 struct xfs_mount *mp = ip->i_mount;
887 ssize_t ret = 0;
888 int unaligned_io = 0;
889 int iolock;
890 struct iov_iter data;
891
892 /* "unaligned" here means not aligned to a filesystem block */
893 if ((iocb->ki_pos & mp->m_blockmask) ||
894 ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
895 unaligned_io = 1;
896 iolock = XFS_IOLOCK_EXCL;
897 } else if (mapping->nrpages) {
898 iolock = XFS_IOLOCK_EXCL;
899 } else {
900 iolock = XFS_IOLOCK_SHARED;
901 }
902 xfs_rw_ilock(ip, iolock);
903
904 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
905 if (ret)
906 goto out;
907
908 /*
909 * Yes, even DAX files can have page cache attached to them: A zeroed
910 * page is inserted into the pagecache when we have to serve a write
911 * fault on a hole. It should never be dirtied and can simply be
912 * dropped from the pagecache once we get real data for the page.
854 */ 913 */
855 ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); 914 if (mapping->nrpages) {
915 ret = invalidate_inode_pages2(mapping);
916 WARN_ON_ONCE(ret);
917 }
918
919 if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
920 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
921 iolock = XFS_IOLOCK_SHARED;
922 }
923
924 trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
925
926 data = *from;
927 ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
928 xfs_end_io_direct_write, 0);
929 if (ret > 0) {
930 iocb->ki_pos += ret;
931 iov_iter_advance(from, ret);
932 }
933out:
934 xfs_rw_iunlock(ip, iolock);
856 return ret; 935 return ret;
857} 936}
858 937
@@ -934,7 +1013,9 @@ xfs_file_write_iter(
934 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1013 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
935 return -EIO; 1014 return -EIO;
936 1015
937 if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) 1016 if (IS_DAX(inode))
1017 ret = xfs_file_dax_write(iocb, from);
1018 else if (iocb->ki_flags & IOCB_DIRECT)
938 ret = xfs_file_dio_aio_write(iocb, from); 1019 ret = xfs_file_dio_aio_write(iocb, from);
939 else 1020 else
940 ret = xfs_file_buffered_aio_write(iocb, from); 1021 ret = xfs_file_buffered_aio_write(iocb, from);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a1bc5c64a573..c2876917dd89 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1164,8 +1164,10 @@ DEFINE_EVENT(xfs_file_class, name, \
1164 TP_ARGS(ip, count, offset)) 1164 TP_ARGS(ip, count, offset))
1165DEFINE_RW_EVENT(xfs_file_buffered_read); 1165DEFINE_RW_EVENT(xfs_file_buffered_read);
1166DEFINE_RW_EVENT(xfs_file_direct_read); 1166DEFINE_RW_EVENT(xfs_file_direct_read);
1167DEFINE_RW_EVENT(xfs_file_dax_read);
1167DEFINE_RW_EVENT(xfs_file_buffered_write); 1168DEFINE_RW_EVENT(xfs_file_buffered_write);
1168DEFINE_RW_EVENT(xfs_file_direct_write); 1169DEFINE_RW_EVENT(xfs_file_direct_write);
1170DEFINE_RW_EVENT(xfs_file_dax_write);
1169DEFINE_RW_EVENT(xfs_file_splice_read); 1171DEFINE_RW_EVENT(xfs_file_splice_read);
1170 1172
1171DECLARE_EVENT_CLASS(xfs_page_class, 1173DECLARE_EVENT_CLASS(xfs_page_class,