aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r--fs/xfs/xfs_file.c161
1 files changed, 137 insertions, 24 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f12ad0a8585..8121e75352ee 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -559,7 +559,7 @@ restart:
559 if (error <= 0) 559 if (error <= 0)
560 return error; 560 return error;
561 561
562 error = xfs_break_layouts(inode, iolock); 562 error = xfs_break_layouts(inode, iolock, true);
563 if (error) 563 if (error)
564 return error; 564 return error;
565 565
@@ -569,21 +569,42 @@ restart:
569 * write. If zeroing is needed and we are currently holding the 569 * write. If zeroing is needed and we are currently holding the
570 * iolock shared, we need to update it to exclusive which implies 570 * iolock shared, we need to update it to exclusive which implies
571 * having to redo all checks before. 571 * having to redo all checks before.
572 *
573 * We need to serialise against EOF updates that occur in IO
574 * completions here. We want to make sure that nobody is changing the
575 * size while we do this check until we have placed an IO barrier (i.e.
576 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
577 * The spinlock effectively forms a memory barrier once we have the
578 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
579 * and hence be able to correctly determine if we need to run zeroing.
572 */ 580 */
581 spin_lock(&ip->i_flags_lock);
573 if (iocb->ki_pos > i_size_read(inode)) { 582 if (iocb->ki_pos > i_size_read(inode)) {
574 bool zero = false; 583 bool zero = false;
575 584
585 spin_unlock(&ip->i_flags_lock);
576 if (*iolock == XFS_IOLOCK_SHARED) { 586 if (*iolock == XFS_IOLOCK_SHARED) {
577 xfs_rw_iunlock(ip, *iolock); 587 xfs_rw_iunlock(ip, *iolock);
578 *iolock = XFS_IOLOCK_EXCL; 588 *iolock = XFS_IOLOCK_EXCL;
579 xfs_rw_ilock(ip, *iolock); 589 xfs_rw_ilock(ip, *iolock);
580 iov_iter_reexpand(from, count); 590 iov_iter_reexpand(from, count);
591
592 /*
593 * We now have an IO submission barrier in place, but
594 * AIO can do EOF updates during IO completion and hence
595 * we now need to wait for all of them to drain. Non-AIO
596 * DIO will have drained before we are given the
597 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
598 * no-op.
599 */
600 inode_dio_wait(inode);
581 goto restart; 601 goto restart;
582 } 602 }
583 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 603 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
584 if (error) 604 if (error)
585 return error; 605 return error;
586 } 606 } else
607 spin_unlock(&ip->i_flags_lock);
587 608
588 /* 609 /*
589 * Updating the timestamps will grab the ilock again from 610 * Updating the timestamps will grab the ilock again from
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write(
645 int iolock; 666 int iolock;
646 size_t count = iov_iter_count(from); 667 size_t count = iov_iter_count(from);
647 loff_t pos = iocb->ki_pos; 668 loff_t pos = iocb->ki_pos;
669 loff_t end;
670 struct iov_iter data;
648 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 671 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
649 mp->m_rtdev_targp : mp->m_ddev_targp; 672 mp->m_rtdev_targp : mp->m_ddev_targp;
650 673
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write(
685 goto out; 708 goto out;
686 count = iov_iter_count(from); 709 count = iov_iter_count(from);
687 pos = iocb->ki_pos; 710 pos = iocb->ki_pos;
711 end = pos + count - 1;
688 712
689 if (mapping->nrpages) { 713 if (mapping->nrpages) {
690 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 714 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
691 pos, pos + count - 1); 715 pos, end);
692 if (ret) 716 if (ret)
693 goto out; 717 goto out;
694 /* 718 /*
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write(
698 */ 722 */
699 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 723 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
700 pos >> PAGE_CACHE_SHIFT, 724 pos >> PAGE_CACHE_SHIFT,
701 (pos + count - 1) >> PAGE_CACHE_SHIFT); 725 end >> PAGE_CACHE_SHIFT);
702 WARN_ON_ONCE(ret); 726 WARN_ON_ONCE(ret);
703 ret = 0; 727 ret = 0;
704 } 728 }
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write(
715 } 739 }
716 740
717 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 741 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
718 ret = generic_file_direct_write(iocb, from, pos);
719 742
743 data = *from;
744 ret = mapping->a_ops->direct_IO(iocb, &data, pos);
745
746 /* see generic_file_direct_write() for why this is necessary */
747 if (mapping->nrpages) {
748 invalidate_inode_pages2_range(mapping,
749 pos >> PAGE_CACHE_SHIFT,
750 end >> PAGE_CACHE_SHIFT);
751 }
752
753 if (ret > 0) {
754 pos += ret;
755 iov_iter_advance(from, ret);
756 iocb->ki_pos = pos;
757 }
720out: 758out:
721 xfs_rw_iunlock(ip, iolock); 759 xfs_rw_iunlock(ip, iolock);
722 760
@@ -822,6 +860,11 @@ xfs_file_write_iter(
822 return ret; 860 return ret;
823} 861}
824 862
863#define XFS_FALLOC_FL_SUPPORTED \
864 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
865 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
866 FALLOC_FL_INSERT_RANGE)
867
825STATIC long 868STATIC long
826xfs_file_fallocate( 869xfs_file_fallocate(
827 struct file *file, 870 struct file *file,
@@ -835,18 +878,21 @@ xfs_file_fallocate(
835 enum xfs_prealloc_flags flags = 0; 878 enum xfs_prealloc_flags flags = 0;
836 uint iolock = XFS_IOLOCK_EXCL; 879 uint iolock = XFS_IOLOCK_EXCL;
837 loff_t new_size = 0; 880 loff_t new_size = 0;
881 bool do_file_insert = 0;
838 882
839 if (!S_ISREG(inode->i_mode)) 883 if (!S_ISREG(inode->i_mode))
840 return -EINVAL; 884 return -EINVAL;
841 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 885 if (mode & ~XFS_FALLOC_FL_SUPPORTED)
842 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
843 return -EOPNOTSUPP; 886 return -EOPNOTSUPP;
844 887
845 xfs_ilock(ip, iolock); 888 xfs_ilock(ip, iolock);
846 error = xfs_break_layouts(inode, &iolock); 889 error = xfs_break_layouts(inode, &iolock, false);
847 if (error) 890 if (error)
848 goto out_unlock; 891 goto out_unlock;
849 892
893 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
894 iolock |= XFS_MMAPLOCK_EXCL;
895
850 if (mode & FALLOC_FL_PUNCH_HOLE) { 896 if (mode & FALLOC_FL_PUNCH_HOLE) {
851 error = xfs_free_file_space(ip, offset, len); 897 error = xfs_free_file_space(ip, offset, len);
852 if (error) 898 if (error)
@@ -873,6 +919,27 @@ xfs_file_fallocate(
873 error = xfs_collapse_file_space(ip, offset, len); 919 error = xfs_collapse_file_space(ip, offset, len);
874 if (error) 920 if (error)
875 goto out_unlock; 921 goto out_unlock;
922 } else if (mode & FALLOC_FL_INSERT_RANGE) {
923 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
924
925 new_size = i_size_read(inode) + len;
926 if (offset & blksize_mask || len & blksize_mask) {
927 error = -EINVAL;
928 goto out_unlock;
929 }
930
931 /* check the new inode size does not wrap through zero */
932 if (new_size > inode->i_sb->s_maxbytes) {
933 error = -EFBIG;
934 goto out_unlock;
935 }
936
937 /* Offset should be less than i_size */
938 if (offset >= i_size_read(inode)) {
939 error = -EINVAL;
940 goto out_unlock;
941 }
942 do_file_insert = 1;
876 } else { 943 } else {
877 flags |= XFS_PREALLOC_SET; 944 flags |= XFS_PREALLOC_SET;
878 945
@@ -907,8 +974,19 @@ xfs_file_fallocate(
907 iattr.ia_valid = ATTR_SIZE; 974 iattr.ia_valid = ATTR_SIZE;
908 iattr.ia_size = new_size; 975 iattr.ia_size = new_size;
909 error = xfs_setattr_size(ip, &iattr); 976 error = xfs_setattr_size(ip, &iattr);
977 if (error)
978 goto out_unlock;
910 } 979 }
911 980
981 /*
982 * Perform hole insertion now that the file size has been
983 * updated so that if we crash during the operation we don't
984 * leave shifted extents past EOF and hence losing access to
985 * the data that is contained within them.
986 */
987 if (do_file_insert)
988 error = xfs_insert_file_space(ip, offset, len);
989
912out_unlock: 990out_unlock:
913 xfs_iunlock(ip, iolock); 991 xfs_iunlock(ip, iolock);
914 return error; 992 return error;
@@ -997,20 +1075,6 @@ xfs_file_mmap(
997} 1075}
998 1076
999/* 1077/*
1000 * mmap()d file has taken write protection fault and is being made
1001 * writable. We can set the page state up correctly for a writable
1002 * page, which means we can do correct delalloc accounting (ENOSPC
1003 * checking!) and unwritten extent mapping.
1004 */
1005STATIC int
1006xfs_vm_page_mkwrite(
1007 struct vm_area_struct *vma,
1008 struct vm_fault *vmf)
1009{
1010 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1011}
1012
1013/*
1014 * This type is designed to indicate the type of offset we would like 1078 * This type is designed to indicate the type of offset we would like
1015 * to search from page cache for xfs_seek_hole_data(). 1079 * to search from page cache for xfs_seek_hole_data().
1016 */ 1080 */
@@ -1385,6 +1449,55 @@ xfs_file_llseek(
1385 } 1449 }
1386} 1450}
1387 1451
1452/*
1453 * Locking for serialisation of IO during page faults. This results in a lock
1454 * ordering of:
1455 *
1456 * mmap_sem (MM)
1457 * i_mmap_lock (XFS - truncate serialisation)
1458 * page_lock (MM)
1459 * i_lock (XFS - extent map serialisation)
1460 */
1461STATIC int
1462xfs_filemap_fault(
1463 struct vm_area_struct *vma,
1464 struct vm_fault *vmf)
1465{
1466 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1467 int error;
1468
1469 trace_xfs_filemap_fault(ip);
1470
1471 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1472 error = filemap_fault(vma, vmf);
1473 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1474
1475 return error;
1476}
1477
1478/*
1479 * mmap()d file has taken write protection fault and is being made writable. We
1480 * can set the page state up correctly for a writable page, which means we can
1481 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1482 * mapping.
1483 */
1484STATIC int
1485xfs_filemap_page_mkwrite(
1486 struct vm_area_struct *vma,
1487 struct vm_fault *vmf)
1488{
1489 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1490 int error;
1491
1492 trace_xfs_filemap_page_mkwrite(ip);
1493
1494 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1495 error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
1496 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1497
1498 return error;
1499}
1500
1388const struct file_operations xfs_file_operations = { 1501const struct file_operations xfs_file_operations = {
1389 .llseek = xfs_file_llseek, 1502 .llseek = xfs_file_llseek,
1390 .read_iter = xfs_file_read_iter, 1503 .read_iter = xfs_file_read_iter,
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
1415}; 1528};
1416 1529
1417static const struct vm_operations_struct xfs_file_vm_ops = { 1530static const struct vm_operations_struct xfs_file_vm_ops = {
1418 .fault = filemap_fault, 1531 .fault = xfs_filemap_fault,
1419 .map_pages = filemap_map_pages, 1532 .map_pages = filemap_map_pages,
1420 .page_mkwrite = xfs_vm_page_mkwrite, 1533 .page_mkwrite = xfs_filemap_page_mkwrite,
1421}; 1534};