diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 391 |
1 files changed, 199 insertions, 192 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f447b783bb84..75899a01dded 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -45,14 +45,14 @@ | |||
45 | * and be replaced with calls into generic code. | 45 | * and be replaced with calls into generic code. |
46 | */ | 46 | */ |
47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 47 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
48 | int write_bytes, | 48 | size_t write_bytes, |
49 | struct page **prepared_pages, | 49 | struct page **prepared_pages, |
50 | struct iov_iter *i) | 50 | struct iov_iter *i) |
51 | { | 51 | { |
52 | size_t copied = 0; | 52 | size_t copied = 0; |
53 | size_t total_copied = 0; | ||
53 | int pg = 0; | 54 | int pg = 0; |
54 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 55 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
55 | int total_copied = 0; | ||
56 | 56 | ||
57 | while (write_bytes > 0) { | 57 | while (write_bytes > 0) { |
58 | size_t count = min_t(size_t, | 58 | size_t count = min_t(size_t, |
@@ -88,9 +88,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
88 | total_copied += copied; | 88 | total_copied += copied; |
89 | 89 | ||
90 | /* Return to btrfs_file_aio_write to fault page */ | 90 | /* Return to btrfs_file_aio_write to fault page */ |
91 | if (unlikely(copied == 0)) { | 91 | if (unlikely(copied == 0)) |
92 | break; | 92 | break; |
93 | } | ||
94 | 93 | ||
95 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 94 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
96 | offset += copied; | 95 | offset += copied; |
@@ -105,12 +104,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
105 | /* | 104 | /* |
106 | * unlocks pages after btrfs_file_write is done with them | 105 | * unlocks pages after btrfs_file_write is done with them |
107 | */ | 106 | */ |
108 | static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | 107 | void btrfs_drop_pages(struct page **pages, size_t num_pages) |
109 | { | 108 | { |
110 | size_t i; | 109 | size_t i; |
111 | for (i = 0; i < num_pages; i++) { | 110 | for (i = 0; i < num_pages; i++) { |
112 | if (!pages[i]) | ||
113 | break; | ||
114 | /* page checked is some magic around finding pages that | 111 | /* page checked is some magic around finding pages that |
115 | * have been modified without going through btrfs_set_page_dirty | 112 | * have been modified without going through btrfs_set_page_dirty |
116 | * clear it here | 113 | * clear it here |
@@ -130,17 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
130 | * this also makes the decision about creating an inline extent vs | 127 | * this also makes the decision about creating an inline extent vs |
131 | * doing real data extents, marking pages dirty and delalloc as required. | 128 | * doing real data extents, marking pages dirty and delalloc as required. |
132 | */ | 129 | */ |
133 | static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | 130 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, |
134 | struct btrfs_root *root, | 131 | struct page **pages, size_t num_pages, |
135 | struct file *file, | 132 | loff_t pos, size_t write_bytes, |
136 | struct page **pages, | 133 | struct extent_state **cached) |
137 | size_t num_pages, | ||
138 | loff_t pos, | ||
139 | size_t write_bytes) | ||
140 | { | 134 | { |
141 | int err = 0; | 135 | int err = 0; |
142 | int i; | 136 | int i; |
143 | struct inode *inode = fdentry(file)->d_inode; | ||
144 | u64 num_bytes; | 137 | u64 num_bytes; |
145 | u64 start_pos; | 138 | u64 start_pos; |
146 | u64 end_of_last_block; | 139 | u64 end_of_last_block; |
@@ -153,8 +146,9 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
153 | 146 | ||
154 | end_of_last_block = start_pos + num_bytes - 1; | 147 | end_of_last_block = start_pos + num_bytes - 1; |
155 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 148 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
156 | NULL); | 149 | cached); |
157 | BUG_ON(err); | 150 | if (err) |
151 | return err; | ||
158 | 152 | ||
159 | for (i = 0; i < num_pages; i++) { | 153 | for (i = 0; i < num_pages; i++) { |
160 | struct page *p = pages[i]; | 154 | struct page *p = pages[i]; |
@@ -162,13 +156,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
162 | ClearPageChecked(p); | 156 | ClearPageChecked(p); |
163 | set_page_dirty(p); | 157 | set_page_dirty(p); |
164 | } | 158 | } |
165 | if (end_pos > isize) { | 159 | |
160 | /* | ||
161 | * we've only changed i_size in ram, and we haven't updated | ||
162 | * the disk i_size. There is no need to log the inode | ||
163 | * at this time. | ||
164 | */ | ||
165 | if (end_pos > isize) | ||
166 | i_size_write(inode, end_pos); | 166 | i_size_write(inode, end_pos); |
167 | /* we've only changed i_size in ram, and we haven't updated | ||
168 | * the disk i_size. There is no need to log the inode | ||
169 | * at this time. | ||
170 | */ | ||
171 | } | ||
172 | return 0; | 167 | return 0; |
173 | } | 168 | } |
174 | 169 | ||
@@ -610,6 +605,8 @@ again: | |||
610 | key.offset = split; | 605 | key.offset = split; |
611 | 606 | ||
612 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 607 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
608 | if (ret < 0) | ||
609 | goto out; | ||
613 | if (ret > 0 && path->slots[0] > 0) | 610 | if (ret > 0 && path->slots[0] > 0) |
614 | path->slots[0]--; | 611 | path->slots[0]--; |
615 | 612 | ||
@@ -819,12 +816,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
819 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; | 816 | last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; |
820 | 817 | ||
821 | if (start_pos > inode->i_size) { | 818 | if (start_pos > inode->i_size) { |
822 | err = btrfs_cont_expand(inode, start_pos); | 819 | err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); |
823 | if (err) | 820 | if (err) |
824 | return err; | 821 | return err; |
825 | } | 822 | } |
826 | 823 | ||
827 | memset(pages, 0, num_pages * sizeof(struct page *)); | ||
828 | again: | 824 | again: |
829 | for (i = 0; i < num_pages; i++) { | 825 | for (i = 0; i < num_pages; i++) { |
830 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 826 | pages[i] = grab_cache_page(inode->i_mapping, index + i); |
@@ -896,156 +892,71 @@ fail: | |||
896 | 892 | ||
897 | } | 893 | } |
898 | 894 | ||
899 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 895 | static noinline ssize_t __btrfs_buffered_write(struct file *file, |
900 | const struct iovec *iov, | 896 | struct iov_iter *i, |
901 | unsigned long nr_segs, loff_t pos) | 897 | loff_t pos) |
902 | { | 898 | { |
903 | struct file *file = iocb->ki_filp; | ||
904 | struct inode *inode = fdentry(file)->d_inode; | 899 | struct inode *inode = fdentry(file)->d_inode; |
905 | struct btrfs_root *root = BTRFS_I(inode)->root; | 900 | struct btrfs_root *root = BTRFS_I(inode)->root; |
906 | struct page **pages = NULL; | 901 | struct page **pages = NULL; |
907 | struct iov_iter i; | ||
908 | loff_t *ppos = &iocb->ki_pos; | ||
909 | loff_t start_pos; | ||
910 | ssize_t num_written = 0; | ||
911 | ssize_t err = 0; | ||
912 | size_t count; | ||
913 | size_t ocount; | ||
914 | int ret = 0; | ||
915 | int nrptrs; | ||
916 | unsigned long first_index; | 902 | unsigned long first_index; |
917 | unsigned long last_index; | 903 | unsigned long last_index; |
918 | int will_write; | 904 | size_t num_written = 0; |
919 | int buffered = 0; | 905 | int nrptrs; |
920 | int copied = 0; | 906 | int ret = 0; |
921 | int dirty_pages = 0; | ||
922 | |||
923 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | ||
924 | (file->f_flags & O_DIRECT)); | ||
925 | |||
926 | start_pos = pos; | ||
927 | |||
928 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
929 | |||
930 | mutex_lock(&inode->i_mutex); | ||
931 | |||
932 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
933 | if (err) | ||
934 | goto out; | ||
935 | count = ocount; | ||
936 | |||
937 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
938 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
939 | if (err) | ||
940 | goto out; | ||
941 | |||
942 | if (count == 0) | ||
943 | goto out; | ||
944 | |||
945 | err = file_remove_suid(file); | ||
946 | if (err) | ||
947 | goto out; | ||
948 | |||
949 | /* | ||
950 | * If BTRFS flips readonly due to some impossible error | ||
951 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
952 | * although we have opened a file as writable, we have | ||
953 | * to stop this write operation to ensure FS consistency. | ||
954 | */ | ||
955 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
956 | err = -EROFS; | ||
957 | goto out; | ||
958 | } | ||
959 | |||
960 | file_update_time(file); | ||
961 | BTRFS_I(inode)->sequence++; | ||
962 | |||
963 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
964 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
965 | pos, ppos, count, | ||
966 | ocount); | ||
967 | /* | ||
968 | * the generic O_DIRECT will update in-memory i_size after the | ||
969 | * DIOs are done. But our endio handlers that update the on | ||
970 | * disk i_size never update past the in memory i_size. So we | ||
971 | * need one more update here to catch any additions to the | ||
972 | * file | ||
973 | */ | ||
974 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
975 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
976 | mark_inode_dirty(inode); | ||
977 | } | ||
978 | |||
979 | if (num_written < 0) { | ||
980 | ret = num_written; | ||
981 | num_written = 0; | ||
982 | goto out; | ||
983 | } else if (num_written == count) { | ||
984 | /* pick up pos changes done by the generic code */ | ||
985 | pos = *ppos; | ||
986 | goto out; | ||
987 | } | ||
988 | /* | ||
989 | * We are going to do buffered for the rest of the range, so we | ||
990 | * need to make sure to invalidate the buffered pages when we're | ||
991 | * done. | ||
992 | */ | ||
993 | buffered = 1; | ||
994 | pos += num_written; | ||
995 | } | ||
996 | 907 | ||
997 | iov_iter_init(&i, iov, nr_segs, count, num_written); | 908 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
998 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
999 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 909 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
1000 | (sizeof(struct page *))); | 910 | (sizeof(struct page *))); |
1001 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 911 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
1002 | if (!pages) { | 912 | if (!pages) |
1003 | ret = -ENOMEM; | 913 | return -ENOMEM; |
1004 | goto out; | ||
1005 | } | ||
1006 | |||
1007 | /* generic_write_checks can change our pos */ | ||
1008 | start_pos = pos; | ||
1009 | 914 | ||
1010 | first_index = pos >> PAGE_CACHE_SHIFT; | 915 | first_index = pos >> PAGE_CACHE_SHIFT; |
1011 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; | 916 | last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT; |
1012 | 917 | ||
1013 | while (iov_iter_count(&i) > 0) { | 918 | while (iov_iter_count(i) > 0) { |
1014 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 919 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
1015 | size_t write_bytes = min(iov_iter_count(&i), | 920 | size_t write_bytes = min(iov_iter_count(i), |
1016 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 921 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1017 | offset); | 922 | offset); |
1018 | size_t num_pages = (write_bytes + offset + | 923 | size_t num_pages = (write_bytes + offset + |
1019 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 924 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
925 | size_t dirty_pages; | ||
926 | size_t copied; | ||
1020 | 927 | ||
1021 | WARN_ON(num_pages > nrptrs); | 928 | WARN_ON(num_pages > nrptrs); |
1022 | memset(pages, 0, sizeof(struct page *) * nrptrs); | ||
1023 | 929 | ||
1024 | /* | 930 | /* |
1025 | * Fault pages before locking them in prepare_pages | 931 | * Fault pages before locking them in prepare_pages |
1026 | * to avoid recursive lock | 932 | * to avoid recursive lock |
1027 | */ | 933 | */ |
1028 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | 934 | if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { |
1029 | ret = -EFAULT; | 935 | ret = -EFAULT; |
1030 | goto out; | 936 | break; |
1031 | } | 937 | } |
1032 | 938 | ||
1033 | ret = btrfs_delalloc_reserve_space(inode, | 939 | ret = btrfs_delalloc_reserve_space(inode, |
1034 | num_pages << PAGE_CACHE_SHIFT); | 940 | num_pages << PAGE_CACHE_SHIFT); |
1035 | if (ret) | 941 | if (ret) |
1036 | goto out; | 942 | break; |
1037 | 943 | ||
944 | /* | ||
945 | * This is going to setup the pages array with the number of | ||
946 | * pages we want, so we don't really need to worry about the | ||
947 | * contents of pages from loop to loop | ||
948 | */ | ||
1038 | ret = prepare_pages(root, file, pages, num_pages, | 949 | ret = prepare_pages(root, file, pages, num_pages, |
1039 | pos, first_index, last_index, | 950 | pos, first_index, last_index, |
1040 | write_bytes); | 951 | write_bytes); |
1041 | if (ret) { | 952 | if (ret) { |
1042 | btrfs_delalloc_release_space(inode, | 953 | btrfs_delalloc_release_space(inode, |
1043 | num_pages << PAGE_CACHE_SHIFT); | 954 | num_pages << PAGE_CACHE_SHIFT); |
1044 | goto out; | 955 | break; |
1045 | } | 956 | } |
1046 | 957 | ||
1047 | copied = btrfs_copy_from_user(pos, num_pages, | 958 | copied = btrfs_copy_from_user(pos, num_pages, |
1048 | write_bytes, pages, &i); | 959 | write_bytes, pages, i); |
1049 | 960 | ||
1050 | /* | 961 | /* |
1051 | * if we have trouble faulting in the pages, fall | 962 | * if we have trouble faulting in the pages, fall |
@@ -1061,6 +972,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1061 | PAGE_CACHE_SIZE - 1) >> | 972 | PAGE_CACHE_SIZE - 1) >> |
1062 | PAGE_CACHE_SHIFT; | 973 | PAGE_CACHE_SHIFT; |
1063 | 974 | ||
975 | /* | ||
976 | * If we had a short copy we need to release the excess delaloc | ||
977 | * bytes we reserved. We need to increment outstanding_extents | ||
978 | * because btrfs_delalloc_release_space will decrement it, but | ||
979 | * we still have an outstanding extent for the chunk we actually | ||
980 | * managed to copy. | ||
981 | */ | ||
1064 | if (num_pages > dirty_pages) { | 982 | if (num_pages > dirty_pages) { |
1065 | if (copied > 0) | 983 | if (copied > 0) |
1066 | atomic_inc( | 984 | atomic_inc( |
@@ -1071,39 +989,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1071 | } | 989 | } |
1072 | 990 | ||
1073 | if (copied > 0) { | 991 | if (copied > 0) { |
1074 | dirty_and_release_pages(NULL, root, file, pages, | 992 | ret = btrfs_dirty_pages(root, inode, pages, |
1075 | dirty_pages, pos, copied); | 993 | dirty_pages, pos, copied, |
994 | NULL); | ||
995 | if (ret) { | ||
996 | btrfs_delalloc_release_space(inode, | ||
997 | dirty_pages << PAGE_CACHE_SHIFT); | ||
998 | btrfs_drop_pages(pages, num_pages); | ||
999 | break; | ||
1000 | } | ||
1076 | } | 1001 | } |
1077 | 1002 | ||
1078 | btrfs_drop_pages(pages, num_pages); | 1003 | btrfs_drop_pages(pages, num_pages); |
1079 | 1004 | ||
1080 | if (copied > 0) { | 1005 | cond_resched(); |
1081 | if (will_write) { | 1006 | |
1082 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1007 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1083 | pos + copied - 1); | 1008 | dirty_pages); |
1084 | } else { | 1009 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1085 | balance_dirty_pages_ratelimited_nr( | 1010 | btrfs_btree_balance_dirty(root, 1); |
1086 | inode->i_mapping, | 1011 | btrfs_throttle(root); |
1087 | dirty_pages); | ||
1088 | if (dirty_pages < | ||
1089 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | ||
1090 | btrfs_btree_balance_dirty(root, 1); | ||
1091 | btrfs_throttle(root); | ||
1092 | } | ||
1093 | } | ||
1094 | 1012 | ||
1095 | pos += copied; | 1013 | pos += copied; |
1096 | num_written += copied; | 1014 | num_written += copied; |
1015 | } | ||
1097 | 1016 | ||
1098 | cond_resched(); | 1017 | kfree(pages); |
1018 | |||
1019 | return num_written ? num_written : ret; | ||
1020 | } | ||
1021 | |||
1022 | static ssize_t __btrfs_direct_write(struct kiocb *iocb, | ||
1023 | const struct iovec *iov, | ||
1024 | unsigned long nr_segs, loff_t pos, | ||
1025 | loff_t *ppos, size_t count, size_t ocount) | ||
1026 | { | ||
1027 | struct file *file = iocb->ki_filp; | ||
1028 | struct inode *inode = fdentry(file)->d_inode; | ||
1029 | struct iov_iter i; | ||
1030 | ssize_t written; | ||
1031 | ssize_t written_buffered; | ||
1032 | loff_t endbyte; | ||
1033 | int err; | ||
1034 | |||
1035 | written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, | ||
1036 | count, ocount); | ||
1037 | |||
1038 | /* | ||
1039 | * the generic O_DIRECT will update in-memory i_size after the | ||
1040 | * DIOs are done. But our endio handlers that update the on | ||
1041 | * disk i_size never update past the in memory i_size. So we | ||
1042 | * need one more update here to catch any additions to the | ||
1043 | * file | ||
1044 | */ | ||
1045 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
1046 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
1047 | mark_inode_dirty(inode); | ||
1099 | } | 1048 | } |
1049 | |||
1050 | if (written < 0 || written == count) | ||
1051 | return written; | ||
1052 | |||
1053 | pos += written; | ||
1054 | count -= written; | ||
1055 | iov_iter_init(&i, iov, nr_segs, count, written); | ||
1056 | written_buffered = __btrfs_buffered_write(file, &i, pos); | ||
1057 | if (written_buffered < 0) { | ||
1058 | err = written_buffered; | ||
1059 | goto out; | ||
1060 | } | ||
1061 | endbyte = pos + written_buffered - 1; | ||
1062 | err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); | ||
1063 | if (err) | ||
1064 | goto out; | ||
1065 | written += written_buffered; | ||
1066 | *ppos = pos + written_buffered; | ||
1067 | invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, | ||
1068 | endbyte >> PAGE_CACHE_SHIFT); | ||
1100 | out: | 1069 | out: |
1101 | mutex_unlock(&inode->i_mutex); | 1070 | return written ? written : err; |
1102 | if (ret) | 1071 | } |
1103 | err = ret; | ||
1104 | 1072 | ||
1105 | kfree(pages); | 1073 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
1106 | *ppos = pos; | 1074 | const struct iovec *iov, |
1075 | unsigned long nr_segs, loff_t pos) | ||
1076 | { | ||
1077 | struct file *file = iocb->ki_filp; | ||
1078 | struct inode *inode = fdentry(file)->d_inode; | ||
1079 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1080 | loff_t *ppos = &iocb->ki_pos; | ||
1081 | ssize_t num_written = 0; | ||
1082 | ssize_t err = 0; | ||
1083 | size_t count, ocount; | ||
1084 | |||
1085 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | ||
1086 | |||
1087 | mutex_lock(&inode->i_mutex); | ||
1088 | |||
1089 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
1090 | if (err) { | ||
1091 | mutex_unlock(&inode->i_mutex); | ||
1092 | goto out; | ||
1093 | } | ||
1094 | count = ocount; | ||
1095 | |||
1096 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | ||
1097 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | ||
1098 | if (err) { | ||
1099 | mutex_unlock(&inode->i_mutex); | ||
1100 | goto out; | ||
1101 | } | ||
1102 | |||
1103 | if (count == 0) { | ||
1104 | mutex_unlock(&inode->i_mutex); | ||
1105 | goto out; | ||
1106 | } | ||
1107 | |||
1108 | err = file_remove_suid(file); | ||
1109 | if (err) { | ||
1110 | mutex_unlock(&inode->i_mutex); | ||
1111 | goto out; | ||
1112 | } | ||
1113 | |||
1114 | /* | ||
1115 | * If BTRFS flips readonly due to some impossible error | ||
1116 | * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR), | ||
1117 | * although we have opened a file as writable, we have | ||
1118 | * to stop this write operation to ensure FS consistency. | ||
1119 | */ | ||
1120 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
1121 | mutex_unlock(&inode->i_mutex); | ||
1122 | err = -EROFS; | ||
1123 | goto out; | ||
1124 | } | ||
1125 | |||
1126 | file_update_time(file); | ||
1127 | BTRFS_I(inode)->sequence++; | ||
1128 | |||
1129 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
1130 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | ||
1131 | pos, ppos, count, ocount); | ||
1132 | } else { | ||
1133 | struct iov_iter i; | ||
1134 | |||
1135 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
1136 | |||
1137 | num_written = __btrfs_buffered_write(file, &i, pos); | ||
1138 | if (num_written > 0) | ||
1139 | *ppos = pos + num_written; | ||
1140 | } | ||
1141 | |||
1142 | mutex_unlock(&inode->i_mutex); | ||
1107 | 1143 | ||
1108 | /* | 1144 | /* |
1109 | * we want to make sure fsync finds this change | 1145 | * we want to make sure fsync finds this change |
@@ -1118,43 +1154,12 @@ out: | |||
1118 | * one running right now. | 1154 | * one running right now. |
1119 | */ | 1155 | */ |
1120 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1156 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1121 | 1157 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | |
1122 | if (num_written > 0 && will_write) { | 1158 | err = generic_write_sync(file, pos, num_written); |
1123 | struct btrfs_trans_handle *trans; | 1159 | if (err < 0 && num_written > 0) |
1124 | |||
1125 | err = btrfs_wait_ordered_range(inode, start_pos, num_written); | ||
1126 | if (err) | ||
1127 | num_written = err; | 1160 | num_written = err; |
1128 | |||
1129 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
1130 | trans = btrfs_start_transaction(root, 0); | ||
1131 | if (IS_ERR(trans)) { | ||
1132 | num_written = PTR_ERR(trans); | ||
1133 | goto done; | ||
1134 | } | ||
1135 | mutex_lock(&inode->i_mutex); | ||
1136 | ret = btrfs_log_dentry_safe(trans, root, | ||
1137 | file->f_dentry); | ||
1138 | mutex_unlock(&inode->i_mutex); | ||
1139 | if (ret == 0) { | ||
1140 | ret = btrfs_sync_log(trans, root); | ||
1141 | if (ret == 0) | ||
1142 | btrfs_end_transaction(trans, root); | ||
1143 | else | ||
1144 | btrfs_commit_transaction(trans, root); | ||
1145 | } else if (ret != BTRFS_NO_LOG_SYNC) { | ||
1146 | btrfs_commit_transaction(trans, root); | ||
1147 | } else { | ||
1148 | btrfs_end_transaction(trans, root); | ||
1149 | } | ||
1150 | } | ||
1151 | if (file->f_flags & O_DIRECT && buffered) { | ||
1152 | invalidate_mapping_pages(inode->i_mapping, | ||
1153 | start_pos >> PAGE_CACHE_SHIFT, | ||
1154 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | ||
1155 | } | ||
1156 | } | 1161 | } |
1157 | done: | 1162 | out: |
1158 | current->backing_dev_info = NULL; | 1163 | current->backing_dev_info = NULL; |
1159 | return num_written ? num_written : err; | 1164 | return num_written ? num_written : err; |
1160 | } | 1165 | } |
@@ -1197,6 +1202,7 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1197 | int ret = 0; | 1202 | int ret = 0; |
1198 | struct btrfs_trans_handle *trans; | 1203 | struct btrfs_trans_handle *trans; |
1199 | 1204 | ||
1205 | trace_btrfs_sync_file(file, datasync); | ||
1200 | 1206 | ||
1201 | /* we wait first, since the writeback may change the inode */ | 1207 | /* we wait first, since the writeback may change the inode */ |
1202 | root->log_batch++; | 1208 | root->log_batch++; |
@@ -1324,7 +1330,8 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1324 | goto out; | 1330 | goto out; |
1325 | 1331 | ||
1326 | if (alloc_start > inode->i_size) { | 1332 | if (alloc_start > inode->i_size) { |
1327 | ret = btrfs_cont_expand(inode, alloc_start); | 1333 | ret = btrfs_cont_expand(inode, i_size_read(inode), |
1334 | alloc_start); | ||
1328 | if (ret) | 1335 | if (ret) |
1329 | goto out; | 1336 | goto out; |
1330 | } | 1337 | } |