diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 161 |
1 files changed, 137 insertions, 24 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1f12ad0a8585..8121e75352ee 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -559,7 +559,7 @@ restart: | |||
559 | if (error <= 0) | 559 | if (error <= 0) |
560 | return error; | 560 | return error; |
561 | 561 | ||
562 | error = xfs_break_layouts(inode, iolock); | 562 | error = xfs_break_layouts(inode, iolock, true); |
563 | if (error) | 563 | if (error) |
564 | return error; | 564 | return error; |
565 | 565 | ||
@@ -569,21 +569,42 @@ restart: | |||
569 | * write. If zeroing is needed and we are currently holding the | 569 | * write. If zeroing is needed and we are currently holding the |
570 | * iolock shared, we need to update it to exclusive which implies | 570 | * iolock shared, we need to update it to exclusive which implies |
571 | * having to redo all checks before. | 571 | * having to redo all checks before. |
572 | * | ||
573 | * We need to serialise against EOF updates that occur in IO | ||
574 | * completions here. We want to make sure that nobody is changing the | ||
575 | * size while we do this check until we have placed an IO barrier (i.e. | ||
576 | * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. | ||
577 | * The spinlock effectively forms a memory barrier once we have the | ||
578 | * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value | ||
579 | * and hence be able to correctly determine if we need to run zeroing. | ||
572 | */ | 580 | */ |
581 | spin_lock(&ip->i_flags_lock); | ||
573 | if (iocb->ki_pos > i_size_read(inode)) { | 582 | if (iocb->ki_pos > i_size_read(inode)) { |
574 | bool zero = false; | 583 | bool zero = false; |
575 | 584 | ||
585 | spin_unlock(&ip->i_flags_lock); | ||
576 | if (*iolock == XFS_IOLOCK_SHARED) { | 586 | if (*iolock == XFS_IOLOCK_SHARED) { |
577 | xfs_rw_iunlock(ip, *iolock); | 587 | xfs_rw_iunlock(ip, *iolock); |
578 | *iolock = XFS_IOLOCK_EXCL; | 588 | *iolock = XFS_IOLOCK_EXCL; |
579 | xfs_rw_ilock(ip, *iolock); | 589 | xfs_rw_ilock(ip, *iolock); |
580 | iov_iter_reexpand(from, count); | 590 | iov_iter_reexpand(from, count); |
591 | |||
592 | /* | ||
593 | * We now have an IO submission barrier in place, but | ||
594 | * AIO can do EOF updates during IO completion and hence | ||
595 | * we now need to wait for all of them to drain. Non-AIO | ||
596 | * DIO will have drained before we are given the | ||
597 | * XFS_IOLOCK_EXCL, and so for most cases this wait is a | ||
598 | * no-op. | ||
599 | */ | ||
600 | inode_dio_wait(inode); | ||
581 | goto restart; | 601 | goto restart; |
582 | } | 602 | } |
583 | error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); | 603 | error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); |
584 | if (error) | 604 | if (error) |
585 | return error; | 605 | return error; |
586 | } | 606 | } else |
607 | spin_unlock(&ip->i_flags_lock); | ||
587 | 608 | ||
588 | /* | 609 | /* |
589 | * Updating the timestamps will grab the ilock again from | 610 | * Updating the timestamps will grab the ilock again from |
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write( | |||
645 | int iolock; | 666 | int iolock; |
646 | size_t count = iov_iter_count(from); | 667 | size_t count = iov_iter_count(from); |
647 | loff_t pos = iocb->ki_pos; | 668 | loff_t pos = iocb->ki_pos; |
669 | loff_t end; | ||
670 | struct iov_iter data; | ||
648 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | 671 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
649 | mp->m_rtdev_targp : mp->m_ddev_targp; | 672 | mp->m_rtdev_targp : mp->m_ddev_targp; |
650 | 673 | ||
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write( | |||
685 | goto out; | 708 | goto out; |
686 | count = iov_iter_count(from); | 709 | count = iov_iter_count(from); |
687 | pos = iocb->ki_pos; | 710 | pos = iocb->ki_pos; |
711 | end = pos + count - 1; | ||
688 | 712 | ||
689 | if (mapping->nrpages) { | 713 | if (mapping->nrpages) { |
690 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 714 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
691 | pos, pos + count - 1); | 715 | pos, end); |
692 | if (ret) | 716 | if (ret) |
693 | goto out; | 717 | goto out; |
694 | /* | 718 | /* |
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write( | |||
698 | */ | 722 | */ |
699 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | 723 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, |
700 | pos >> PAGE_CACHE_SHIFT, | 724 | pos >> PAGE_CACHE_SHIFT, |
701 | (pos + count - 1) >> PAGE_CACHE_SHIFT); | 725 | end >> PAGE_CACHE_SHIFT); |
702 | WARN_ON_ONCE(ret); | 726 | WARN_ON_ONCE(ret); |
703 | ret = 0; | 727 | ret = 0; |
704 | } | 728 | } |
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write( | |||
715 | } | 739 | } |
716 | 740 | ||
717 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); | 741 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
718 | ret = generic_file_direct_write(iocb, from, pos); | ||
719 | 742 | ||
743 | data = *from; | ||
744 | ret = mapping->a_ops->direct_IO(iocb, &data, pos); | ||
745 | |||
746 | /* see generic_file_direct_write() for why this is necessary */ | ||
747 | if (mapping->nrpages) { | ||
748 | invalidate_inode_pages2_range(mapping, | ||
749 | pos >> PAGE_CACHE_SHIFT, | ||
750 | end >> PAGE_CACHE_SHIFT); | ||
751 | } | ||
752 | |||
753 | if (ret > 0) { | ||
754 | pos += ret; | ||
755 | iov_iter_advance(from, ret); | ||
756 | iocb->ki_pos = pos; | ||
757 | } | ||
720 | out: | 758 | out: |
721 | xfs_rw_iunlock(ip, iolock); | 759 | xfs_rw_iunlock(ip, iolock); |
722 | 760 | ||
@@ -822,6 +860,11 @@ xfs_file_write_iter( | |||
822 | return ret; | 860 | return ret; |
823 | } | 861 | } |
824 | 862 | ||
863 | #define XFS_FALLOC_FL_SUPPORTED \ | ||
864 | (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ | ||
865 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ | ||
866 | FALLOC_FL_INSERT_RANGE) | ||
867 | |||
825 | STATIC long | 868 | STATIC long |
826 | xfs_file_fallocate( | 869 | xfs_file_fallocate( |
827 | struct file *file, | 870 | struct file *file, |
@@ -835,18 +878,21 @@ xfs_file_fallocate( | |||
835 | enum xfs_prealloc_flags flags = 0; | 878 | enum xfs_prealloc_flags flags = 0; |
836 | uint iolock = XFS_IOLOCK_EXCL; | 879 | uint iolock = XFS_IOLOCK_EXCL; |
837 | loff_t new_size = 0; | 880 | loff_t new_size = 0; |
881 | bool do_file_insert = 0; | ||
838 | 882 | ||
839 | if (!S_ISREG(inode->i_mode)) | 883 | if (!S_ISREG(inode->i_mode)) |
840 | return -EINVAL; | 884 | return -EINVAL; |
841 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | | 885 | if (mode & ~XFS_FALLOC_FL_SUPPORTED) |
842 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
843 | return -EOPNOTSUPP; | 886 | return -EOPNOTSUPP; |
844 | 887 | ||
845 | xfs_ilock(ip, iolock); | 888 | xfs_ilock(ip, iolock); |
846 | error = xfs_break_layouts(inode, &iolock); | 889 | error = xfs_break_layouts(inode, &iolock, false); |
847 | if (error) | 890 | if (error) |
848 | goto out_unlock; | 891 | goto out_unlock; |
849 | 892 | ||
893 | xfs_ilock(ip, XFS_MMAPLOCK_EXCL); | ||
894 | iolock |= XFS_MMAPLOCK_EXCL; | ||
895 | |||
850 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 896 | if (mode & FALLOC_FL_PUNCH_HOLE) { |
851 | error = xfs_free_file_space(ip, offset, len); | 897 | error = xfs_free_file_space(ip, offset, len); |
852 | if (error) | 898 | if (error) |
@@ -873,6 +919,27 @@ xfs_file_fallocate( | |||
873 | error = xfs_collapse_file_space(ip, offset, len); | 919 | error = xfs_collapse_file_space(ip, offset, len); |
874 | if (error) | 920 | if (error) |
875 | goto out_unlock; | 921 | goto out_unlock; |
922 | } else if (mode & FALLOC_FL_INSERT_RANGE) { | ||
923 | unsigned blksize_mask = (1 << inode->i_blkbits) - 1; | ||
924 | |||
925 | new_size = i_size_read(inode) + len; | ||
926 | if (offset & blksize_mask || len & blksize_mask) { | ||
927 | error = -EINVAL; | ||
928 | goto out_unlock; | ||
929 | } | ||
930 | |||
931 | /* check the new inode size does not wrap through zero */ | ||
932 | if (new_size > inode->i_sb->s_maxbytes) { | ||
933 | error = -EFBIG; | ||
934 | goto out_unlock; | ||
935 | } | ||
936 | |||
937 | /* Offset should be less than i_size */ | ||
938 | if (offset >= i_size_read(inode)) { | ||
939 | error = -EINVAL; | ||
940 | goto out_unlock; | ||
941 | } | ||
942 | do_file_insert = 1; | ||
876 | } else { | 943 | } else { |
877 | flags |= XFS_PREALLOC_SET; | 944 | flags |= XFS_PREALLOC_SET; |
878 | 945 | ||
@@ -907,8 +974,19 @@ xfs_file_fallocate( | |||
907 | iattr.ia_valid = ATTR_SIZE; | 974 | iattr.ia_valid = ATTR_SIZE; |
908 | iattr.ia_size = new_size; | 975 | iattr.ia_size = new_size; |
909 | error = xfs_setattr_size(ip, &iattr); | 976 | error = xfs_setattr_size(ip, &iattr); |
977 | if (error) | ||
978 | goto out_unlock; | ||
910 | } | 979 | } |
911 | 980 | ||
981 | /* | ||
982 | * Perform hole insertion now that the file size has been | ||
983 | * updated so that if we crash during the operation we don't | ||
984 | * leave shifted extents past EOF and hence losing access to | ||
985 | * the data that is contained within them. | ||
986 | */ | ||
987 | if (do_file_insert) | ||
988 | error = xfs_insert_file_space(ip, offset, len); | ||
989 | |||
912 | out_unlock: | 990 | out_unlock: |
913 | xfs_iunlock(ip, iolock); | 991 | xfs_iunlock(ip, iolock); |
914 | return error; | 992 | return error; |
@@ -997,20 +1075,6 @@ xfs_file_mmap( | |||
997 | } | 1075 | } |
998 | 1076 | ||
999 | /* | 1077 | /* |
1000 | * mmap()d file has taken write protection fault and is being made | ||
1001 | * writable. We can set the page state up correctly for a writable | ||
1002 | * page, which means we can do correct delalloc accounting (ENOSPC | ||
1003 | * checking!) and unwritten extent mapping. | ||
1004 | */ | ||
1005 | STATIC int | ||
1006 | xfs_vm_page_mkwrite( | ||
1007 | struct vm_area_struct *vma, | ||
1008 | struct vm_fault *vmf) | ||
1009 | { | ||
1010 | return block_page_mkwrite(vma, vmf, xfs_get_blocks); | ||
1011 | } | ||
1012 | |||
1013 | /* | ||
1014 | * This type is designed to indicate the type of offset we would like | 1078 | * This type is designed to indicate the type of offset we would like |
1015 | * to search from page cache for xfs_seek_hole_data(). | 1079 | * to search from page cache for xfs_seek_hole_data(). |
1016 | */ | 1080 | */ |
@@ -1385,6 +1449,55 @@ xfs_file_llseek( | |||
1385 | } | 1449 | } |
1386 | } | 1450 | } |
1387 | 1451 | ||
1452 | /* | ||
1453 | * Locking for serialisation of IO during page faults. This results in a lock | ||
1454 | * ordering of: | ||
1455 | * | ||
1456 | * mmap_sem (MM) | ||
1457 | * i_mmap_lock (XFS - truncate serialisation) | ||
1458 | * page_lock (MM) | ||
1459 | * i_lock (XFS - extent map serialisation) | ||
1460 | */ | ||
1461 | STATIC int | ||
1462 | xfs_filemap_fault( | ||
1463 | struct vm_area_struct *vma, | ||
1464 | struct vm_fault *vmf) | ||
1465 | { | ||
1466 | struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); | ||
1467 | int error; | ||
1468 | |||
1469 | trace_xfs_filemap_fault(ip); | ||
1470 | |||
1471 | xfs_ilock(ip, XFS_MMAPLOCK_SHARED); | ||
1472 | error = filemap_fault(vma, vmf); | ||
1473 | xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); | ||
1474 | |||
1475 | return error; | ||
1476 | } | ||
1477 | |||
1478 | /* | ||
1479 | * mmap()d file has taken write protection fault and is being made writable. We | ||
1480 | * can set the page state up correctly for a writable page, which means we can | ||
1481 | * do correct delalloc accounting (ENOSPC checking!) and unwritten extent | ||
1482 | * mapping. | ||
1483 | */ | ||
1484 | STATIC int | ||
1485 | xfs_filemap_page_mkwrite( | ||
1486 | struct vm_area_struct *vma, | ||
1487 | struct vm_fault *vmf) | ||
1488 | { | ||
1489 | struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); | ||
1490 | int error; | ||
1491 | |||
1492 | trace_xfs_filemap_page_mkwrite(ip); | ||
1493 | |||
1494 | xfs_ilock(ip, XFS_MMAPLOCK_SHARED); | ||
1495 | error = block_page_mkwrite(vma, vmf, xfs_get_blocks); | ||
1496 | xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); | ||
1497 | |||
1498 | return error; | ||
1499 | } | ||
1500 | |||
1388 | const struct file_operations xfs_file_operations = { | 1501 | const struct file_operations xfs_file_operations = { |
1389 | .llseek = xfs_file_llseek, | 1502 | .llseek = xfs_file_llseek, |
1390 | .read_iter = xfs_file_read_iter, | 1503 | .read_iter = xfs_file_read_iter, |
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = { | |||
1415 | }; | 1528 | }; |
1416 | 1529 | ||
1417 | static const struct vm_operations_struct xfs_file_vm_ops = { | 1530 | static const struct vm_operations_struct xfs_file_vm_ops = { |
1418 | .fault = filemap_fault, | 1531 | .fault = xfs_filemap_fault, |
1419 | .map_pages = filemap_map_pages, | 1532 | .map_pages = filemap_map_pages, |
1420 | .page_mkwrite = xfs_vm_page_mkwrite, | 1533 | .page_mkwrite = xfs_filemap_page_mkwrite, |
1421 | }; | 1534 | }; |