diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 166 |
1 files changed, 102 insertions, 64 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 29ff749ff4ca..79437c5eeb1e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -46,32 +46,42 @@ | |||
46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
47 | int write_bytes, | 47 | int write_bytes, |
48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
49 | const char __user *buf) | 49 | struct iov_iter *i) |
50 | { | 50 | { |
51 | long page_fault = 0; | 51 | size_t copied; |
52 | int i; | 52 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | 54 | ||
55 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
59 | fault_in_pages_readable(buf, count); | 59 | again: |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
61 | return -EFAULT; | ||
60 | 62 | ||
61 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
62 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
63 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
64 | buf, count); | ||
65 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
66 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
67 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
68 | buf += count; | 69 | write_bytes -= copied; |
69 | write_bytes -= count; | ||
70 | 70 | ||
71 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
72 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | ||
76 | |||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
78 | offset += copied; | ||
79 | } else { | ||
80 | pg++; | ||
81 | offset = 0; | ||
82 | } | ||
73 | } | 83 | } |
74 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
75 | } | 85 | } |
76 | 86 | ||
77 | /* | 87 | /* |
@@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
126 | end_of_last_block = start_pos + num_bytes - 1; | 136 | end_of_last_block = start_pos + num_bytes - 1; |
127 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
128 | NULL); | 138 | NULL); |
129 | if (err) | 139 | BUG_ON(err); |
130 | return err; | ||
131 | 140 | ||
132 | for (i = 0; i < num_pages; i++) { | 141 | for (i = 0; i < num_pages; i++) { |
133 | struct page *p = pages[i]; | 142 | struct page *p = pages[i]; |
@@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
142 | * at this time. | 151 | * at this time. |
143 | */ | 152 | */ |
144 | } | 153 | } |
145 | return err; | 154 | return 0; |
146 | } | 155 | } |
147 | 156 | ||
148 | /* | 157 | /* |
@@ -823,45 +832,46 @@ again: | |||
823 | return 0; | 832 | return 0; |
824 | } | 833 | } |
825 | 834 | ||
826 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
827 | size_t count, loff_t *ppos) | 836 | const struct iovec *iov, |
837 | unsigned long nr_segs, loff_t pos) | ||
828 | { | 838 | { |
829 | loff_t pos; | 839 | struct file *file = iocb->ki_filp; |
840 | struct inode *inode = fdentry(file)->d_inode; | ||
841 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
842 | struct page *pinned[2]; | ||
843 | struct page **pages = NULL; | ||
844 | struct iov_iter i; | ||
845 | loff_t *ppos = &iocb->ki_pos; | ||
830 | loff_t start_pos; | 846 | loff_t start_pos; |
831 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
832 | ssize_t err = 0; | 848 | ssize_t err = 0; |
849 | size_t count; | ||
850 | size_t ocount; | ||
833 | int ret = 0; | 851 | int ret = 0; |
834 | struct inode *inode = fdentry(file)->d_inode; | ||
835 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
836 | struct page **pages = NULL; | ||
837 | int nrptrs; | 852 | int nrptrs; |
838 | struct page *pinned[2]; | ||
839 | unsigned long first_index; | 853 | unsigned long first_index; |
840 | unsigned long last_index; | 854 | unsigned long last_index; |
841 | int will_write; | 855 | int will_write; |
856 | int buffered = 0; | ||
842 | 857 | ||
843 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
844 | (file->f_flags & O_DIRECT)); | 859 | (file->f_flags & O_DIRECT)); |
845 | 860 | ||
846 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
847 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
848 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
849 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
850 | 863 | ||
851 | pos = *ppos; | ||
852 | start_pos = pos; | 864 | start_pos = pos; |
853 | 865 | ||
854 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
855 | 867 | ||
856 | /* do the reserve before the mutex lock in case we have to do some | ||
857 | * flushing. We wouldn't deadlock, but this is more polite. | ||
858 | */ | ||
859 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
860 | if (err) | ||
861 | goto out_nolock; | ||
862 | |||
863 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
864 | 869 | ||
870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
871 | if (err) | ||
872 | goto out; | ||
873 | count = ocount; | ||
874 | |||
865 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
866 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
867 | if (err) | 877 | if (err) |
@@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
875 | goto out; | 885 | goto out; |
876 | 886 | ||
877 | file_update_time(file); | 887 | file_update_time(file); |
888 | BTRFS_I(inode)->sequence++; | ||
889 | |||
890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
892 | pos, ppos, count, | ||
893 | ocount); | ||
894 | /* | ||
895 | * the generic O_DIRECT will update in-memory i_size after the | ||
896 | * DIOs are done. But our endio handlers that update the on | ||
897 | * disk i_size never update past the in memory i_size. So we | ||
898 | * need one more update here to catch any additions to the | ||
899 | * file | ||
900 | */ | ||
901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
903 | mark_inode_dirty(inode); | ||
904 | } | ||
878 | 905 | ||
906 | if (num_written < 0) { | ||
907 | ret = num_written; | ||
908 | num_written = 0; | ||
909 | goto out; | ||
910 | } else if (num_written == count) { | ||
911 | /* pick up pos changes done by the generic code */ | ||
912 | pos = *ppos; | ||
913 | goto out; | ||
914 | } | ||
915 | /* | ||
916 | * We are going to do buffered for the rest of the range, so we | ||
917 | * need to make sure to invalidate the buffered pages when we're | ||
918 | * done. | ||
919 | */ | ||
920 | buffered = 1; | ||
921 | pos += num_written; | ||
922 | } | ||
923 | |||
924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
927 | (sizeof(struct page *))); | ||
879 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
880 | 929 | ||
881 | /* generic_write_checks can change our pos */ | 930 | /* generic_write_checks can change our pos */ |
882 | start_pos = pos; | 931 | start_pos = pos; |
883 | 932 | ||
884 | BTRFS_I(inode)->sequence++; | ||
885 | first_index = pos >> PAGE_CACHE_SHIFT; | 933 | first_index = pos >> PAGE_CACHE_SHIFT; |
886 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
887 | 935 | ||
888 | /* | 936 | /* |
889 | * there are lots of better ways to do this, but this code | 937 | * there are lots of better ways to do this, but this code |
@@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
900 | unlock_page(pinned[0]); | 948 | unlock_page(pinned[0]); |
901 | } | 949 | } |
902 | } | 950 | } |
903 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
904 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
905 | if (!PageUptodate(pinned[1])) { | 953 | if (!PageUptodate(pinned[1])) { |
906 | ret = btrfs_readpage(NULL, pinned[1]); | 954 | ret = btrfs_readpage(NULL, pinned[1]); |
@@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
911 | } | 959 | } |
912 | } | 960 | } |
913 | 961 | ||
914 | while (count > 0) { | 962 | while (iov_iter_count(&i) > 0) { |
915 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
916 | size_t write_bytes = min(count, nrptrs * | 964 | size_t write_bytes = min(iov_iter_count(&i), |
917 | (size_t)PAGE_CACHE_SIZE - | 965 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
918 | offset); | 966 | offset); |
919 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
920 | PAGE_CACHE_SHIFT; | 968 | PAGE_CACHE_SHIFT; |
@@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
922 | WARN_ON(num_pages > nrptrs); | 970 | WARN_ON(num_pages > nrptrs); |
923 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 971 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
924 | 972 | ||
925 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); |
926 | if (ret) | 974 | if (ret) |
927 | goto out; | 975 | goto out; |
928 | 976 | ||
@@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
930 | pos, first_index, last_index, | 978 | pos, first_index, last_index, |
931 | write_bytes); | 979 | write_bytes); |
932 | if (ret) { | 980 | if (ret) { |
933 | btrfs_free_reserved_data_space(root, inode, | 981 | btrfs_delalloc_release_space(inode, write_bytes); |
934 | write_bytes); | ||
935 | goto out; | 982 | goto out; |
936 | } | 983 | } |
937 | 984 | ||
938 | ret = btrfs_copy_from_user(pos, num_pages, | 985 | ret = btrfs_copy_from_user(pos, num_pages, |
939 | write_bytes, pages, buf); | 986 | write_bytes, pages, &i); |
940 | if (ret) { | 987 | if (ret == 0) { |
941 | btrfs_free_reserved_data_space(root, inode, | 988 | dirty_and_release_pages(NULL, root, file, pages, |
942 | write_bytes); | 989 | num_pages, pos, write_bytes); |
943 | btrfs_drop_pages(pages, num_pages); | ||
944 | goto out; | ||
945 | } | 990 | } |
946 | 991 | ||
947 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
948 | num_pages, pos, write_bytes); | ||
949 | btrfs_drop_pages(pages, num_pages); | 992 | btrfs_drop_pages(pages, num_pages); |
950 | if (ret) { | 993 | if (ret) { |
951 | btrfs_free_reserved_data_space(root, inode, | 994 | btrfs_delalloc_release_space(inode, write_bytes); |
952 | write_bytes); | ||
953 | goto out; | 995 | goto out; |
954 | } | 996 | } |
955 | 997 | ||
@@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
965 | btrfs_throttle(root); | 1007 | btrfs_throttle(root); |
966 | } | 1008 | } |
967 | 1009 | ||
968 | buf += write_bytes; | ||
969 | count -= write_bytes; | ||
970 | pos += write_bytes; | 1010 | pos += write_bytes; |
971 | num_written += write_bytes; | 1011 | num_written += write_bytes; |
972 | 1012 | ||
@@ -976,9 +1016,7 @@ out: | |||
976 | mutex_unlock(&inode->i_mutex); | 1016 | mutex_unlock(&inode->i_mutex); |
977 | if (ret) | 1017 | if (ret) |
978 | err = ret; | 1018 | err = ret; |
979 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
980 | 1019 | ||
981 | out_nolock: | ||
982 | kfree(pages); | 1020 | kfree(pages); |
983 | if (pinned[0]) | 1021 | if (pinned[0]) |
984 | page_cache_release(pinned[0]); | 1022 | page_cache_release(pinned[0]); |
@@ -1008,7 +1046,7 @@ out_nolock: | |||
1008 | num_written = err; | 1046 | num_written = err; |
1009 | 1047 | ||
1010 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1011 | trans = btrfs_start_transaction(root, 1); | 1049 | trans = btrfs_start_transaction(root, 0); |
1012 | ret = btrfs_log_dentry_safe(trans, root, | 1050 | ret = btrfs_log_dentry_safe(trans, root, |
1013 | file->f_dentry); | 1051 | file->f_dentry); |
1014 | if (ret == 0) { | 1052 | if (ret == 0) { |
@@ -1023,7 +1061,7 @@ out_nolock: | |||
1023 | btrfs_end_transaction(trans, root); | 1061 | btrfs_end_transaction(trans, root); |
1024 | } | 1062 | } |
1025 | } | 1063 | } |
1026 | if (file->f_flags & O_DIRECT) { | 1064 | if (file->f_flags & O_DIRECT && buffered) { |
1027 | invalidate_mapping_pages(inode->i_mapping, | 1065 | invalidate_mapping_pages(inode->i_mapping, |
1028 | start_pos >> PAGE_CACHE_SHIFT, | 1066 | start_pos >> PAGE_CACHE_SHIFT, |
1029 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
@@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1104 | if (file && file->private_data) | 1142 | if (file && file->private_data) |
1105 | btrfs_ioctl_trans_end(file); | 1143 | btrfs_ioctl_trans_end(file); |
1106 | 1144 | ||
1107 | trans = btrfs_start_transaction(root, 1); | 1145 | trans = btrfs_start_transaction(root, 0); |
1108 | if (!trans) { | 1146 | if (IS_ERR(trans)) { |
1109 | ret = -ENOMEM; | 1147 | ret = PTR_ERR(trans); |
1110 | goto out; | 1148 | goto out; |
1111 | } | 1149 | } |
1112 | 1150 | ||
@@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = { | |||
1161 | .read = do_sync_read, | 1199 | .read = do_sync_read, |
1162 | .aio_read = generic_file_aio_read, | 1200 | .aio_read = generic_file_aio_read, |
1163 | .splice_read = generic_file_splice_read, | 1201 | .splice_read = generic_file_splice_read, |
1164 | .write = btrfs_file_write, | 1202 | .aio_write = btrfs_file_aio_write, |
1165 | .mmap = btrfs_file_mmap, | 1203 | .mmap = btrfs_file_mmap, |
1166 | .open = generic_file_open, | 1204 | .open = generic_file_open, |
1167 | .release = btrfs_release_file, | 1205 | .release = btrfs_release_file, |