aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/file.c355
1 files changed, 180 insertions, 175 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4d4975592668..f2a80e570a6c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -45,14 +45,14 @@
45 * and be replaced with calls into generic code. 45 * and be replaced with calls into generic code.
46 */ 46 */
47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, 47static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 int write_bytes, 48 size_t write_bytes,
49 struct page **prepared_pages, 49 struct page **prepared_pages,
50 struct iov_iter *i) 50 struct iov_iter *i)
51{ 51{
52 size_t copied = 0; 52 size_t copied = 0;
53 size_t total_copied = 0;
53 int pg = 0; 54 int pg = 0;
54 int offset = pos & (PAGE_CACHE_SIZE - 1); 55 int offset = pos & (PAGE_CACHE_SIZE - 1);
55 int total_copied = 0;
56 56
57 while (write_bytes > 0) { 57 while (write_bytes > 0) {
58 size_t count = min_t(size_t, 58 size_t count = min_t(size_t,
@@ -129,13 +129,12 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
129 * this also makes the decision about creating an inline extent vs 129 * this also makes the decision about creating an inline extent vs
130 * doing real data extents, marking pages dirty and delalloc as required. 130 * doing real data extents, marking pages dirty and delalloc as required.
131 */ 131 */
132static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, 132static noinline int dirty_and_release_pages(struct btrfs_root *root,
133 struct btrfs_root *root, 133 struct file *file,
134 struct file *file, 134 struct page **pages,
135 struct page **pages, 135 size_t num_pages,
136 size_t num_pages, 136 loff_t pos,
137 loff_t pos, 137 size_t write_bytes)
138 size_t write_bytes)
139{ 138{
140 int err = 0; 139 int err = 0;
141 int i; 140 int i;
@@ -153,7 +152,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
153 end_of_last_block = start_pos + num_bytes - 1; 152 end_of_last_block = start_pos + num_bytes - 1;
154 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 153 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
155 NULL); 154 NULL);
156 BUG_ON(err); 155 if (err)
156 return err;
157 157
158 for (i = 0; i < num_pages; i++) { 158 for (i = 0; i < num_pages; i++) {
159 struct page *p = pages[i]; 159 struct page *p = pages[i];
@@ -896,127 +896,38 @@ fail:
896 896
897} 897}
898 898
899static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static noinline ssize_t __btrfs_buffered_write(struct file *file,
900 const struct iovec *iov, 900 struct iov_iter *i,
901 unsigned long nr_segs, loff_t pos) 901 loff_t pos)
902{ 902{
903 struct file *file = iocb->ki_filp;
904 struct inode *inode = fdentry(file)->d_inode; 903 struct inode *inode = fdentry(file)->d_inode;
905 struct btrfs_root *root = BTRFS_I(inode)->root; 904 struct btrfs_root *root = BTRFS_I(inode)->root;
906 struct page **pages = NULL; 905 struct page **pages = NULL;
907 struct iov_iter i;
908 loff_t *ppos = &iocb->ki_pos;
909 loff_t start_pos;
910 ssize_t num_written = 0;
911 ssize_t err = 0;
912 size_t count;
913 size_t ocount;
914 int ret = 0;
915 int nrptrs;
916 unsigned long first_index; 906 unsigned long first_index;
917 unsigned long last_index; 907 unsigned long last_index;
918 int will_write; 908 size_t num_written = 0;
919 int buffered = 0; 909 int nrptrs;
920 int copied = 0; 910 int ret;
921 int dirty_pages = 0;
922
923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
924 (file->f_flags & O_DIRECT));
925
926 start_pos = pos;
927
928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
929
930 mutex_lock(&inode->i_mutex);
931
932 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
933 if (err)
934 goto out;
935 count = ocount;
936
937 current->backing_dev_info = inode->i_mapping->backing_dev_info;
938 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
939 if (err)
940 goto out;
941
942 if (count == 0)
943 goto out;
944
945 err = file_remove_suid(file);
946 if (err)
947 goto out;
948
949 /*
950 * If BTRFS flips readonly due to some impossible error
951 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
952 * although we have opened a file as writable, we have
953 * to stop this write operation to ensure FS consistency.
954 */
955 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
956 err = -EROFS;
957 goto out;
958 }
959
960 file_update_time(file);
961 BTRFS_I(inode)->sequence++;
962
963 if (unlikely(file->f_flags & O_DIRECT)) {
964 num_written = generic_file_direct_write(iocb, iov, &nr_segs,
965 pos, ppos, count,
966 ocount);
967 /*
968 * the generic O_DIRECT will update in-memory i_size after the
969 * DIOs are done. But our endio handlers that update the on
970 * disk i_size never update past the in memory i_size. So we
971 * need one more update here to catch any additions to the
972 * file
973 */
974 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
975 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
976 mark_inode_dirty(inode);
977 }
978
979 if (num_written < 0) {
980 ret = num_written;
981 num_written = 0;
982 goto out;
983 } else if (num_written == count) {
984 /* pick up pos changes done by the generic code */
985 pos = *ppos;
986 goto out;
987 }
988 /*
989 * We are going to do buffered for the rest of the range, so we
990 * need to make sure to invalidate the buffered pages when we're
991 * done.
992 */
993 buffered = 1;
994 pos += num_written;
995 }
996 911
997 iov_iter_init(&i, iov, nr_segs, count, num_written); 912 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
998 nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
999 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 913 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1000 (sizeof(struct page *))); 914 (sizeof(struct page *)));
1001 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 915 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1002 if (!pages) { 916 if (!pages)
1003 ret = -ENOMEM; 917 return -ENOMEM;
1004 goto out;
1005 }
1006
1007 /* generic_write_checks can change our pos */
1008 start_pos = pos;
1009 918
1010 first_index = pos >> PAGE_CACHE_SHIFT; 919 first_index = pos >> PAGE_CACHE_SHIFT;
1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 920 last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
1012 921
1013 while (iov_iter_count(&i) > 0) { 922 while (iov_iter_count(i) > 0) {
1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 923 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1015 size_t write_bytes = min(iov_iter_count(&i), 924 size_t write_bytes = min(iov_iter_count(i),
1016 nrptrs * (size_t)PAGE_CACHE_SIZE - 925 nrptrs * (size_t)PAGE_CACHE_SIZE -
1017 offset); 926 offset);
1018 size_t num_pages = (write_bytes + offset + 927 size_t num_pages = (write_bytes + offset +
1019 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 928 PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
929 size_t dirty_pages;
930 size_t copied;
1020 931
1021 WARN_ON(num_pages > nrptrs); 932 WARN_ON(num_pages > nrptrs);
1022 memset(pages, 0, sizeof(struct page *) * nrptrs); 933 memset(pages, 0, sizeof(struct page *) * nrptrs);
@@ -1025,15 +936,15 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1025 * Fault pages before locking them in prepare_pages 936 * Fault pages before locking them in prepare_pages
1026 * to avoid recursive lock 937 * to avoid recursive lock
1027 */ 938 */
1028 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { 939 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1029 ret = -EFAULT; 940 ret = -EFAULT;
1030 goto out; 941 break;
1031 } 942 }
1032 943
1033 ret = btrfs_delalloc_reserve_space(inode, 944 ret = btrfs_delalloc_reserve_space(inode,
1034 num_pages << PAGE_CACHE_SHIFT); 945 num_pages << PAGE_CACHE_SHIFT);
1035 if (ret) 946 if (ret)
1036 goto out; 947 break;
1037 948
1038 ret = prepare_pages(root, file, pages, num_pages, 949 ret = prepare_pages(root, file, pages, num_pages,
1039 pos, first_index, last_index, 950 pos, first_index, last_index,
@@ -1041,11 +952,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1041 if (ret) { 952 if (ret) {
1042 btrfs_delalloc_release_space(inode, 953 btrfs_delalloc_release_space(inode,
1043 num_pages << PAGE_CACHE_SHIFT); 954 num_pages << PAGE_CACHE_SHIFT);
1044 goto out; 955 break;
1045 } 956 }
1046 957
1047 copied = btrfs_copy_from_user(pos, num_pages, 958 copied = btrfs_copy_from_user(pos, num_pages,
1048 write_bytes, pages, &i); 959 write_bytes, pages, i);
1049 960
1050 /* 961 /*
1051 * if we have trouble faulting in the pages, fall 962 * if we have trouble faulting in the pages, fall
@@ -1061,6 +972,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1061 PAGE_CACHE_SIZE - 1) >> 972 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT; 973 PAGE_CACHE_SHIFT;
1063 974
975 /*
976 * If we had a short copy we need to release the excess delaloc
977 * bytes we reserved. We need to increment outstanding_extents
978 * because btrfs_delalloc_release_space will decrement it, but
979 * we still have an outstanding extent for the chunk we actually
980 * managed to copy.
981 */
1064 if (num_pages > dirty_pages) { 982 if (num_pages > dirty_pages) {
1065 if (copied > 0) 983 if (copied > 0)
1066 atomic_inc( 984 atomic_inc(
@@ -1071,39 +989,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1071 } 989 }
1072 990
1073 if (copied > 0) { 991 if (copied > 0) {
1074 dirty_and_release_pages(NULL, root, file, pages, 992 ret = dirty_and_release_pages(root, file, pages,
1075 dirty_pages, pos, copied); 993 dirty_pages, pos,
994 copied);
995 if (ret) {
996 btrfs_delalloc_release_space(inode,
997 dirty_pages << PAGE_CACHE_SHIFT);
998 btrfs_drop_pages(pages, num_pages);
999 break;
1000 }
1076 } 1001 }
1077 1002
1078 btrfs_drop_pages(pages, num_pages); 1003 btrfs_drop_pages(pages, num_pages);
1079 1004
1080 if (copied > 0) { 1005 cond_resched();
1081 if (will_write) { 1006
1082 filemap_fdatawrite_range(inode->i_mapping, pos, 1007 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1083 pos + copied - 1); 1008 dirty_pages);
1084 } else { 1009 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1085 balance_dirty_pages_ratelimited_nr( 1010 btrfs_btree_balance_dirty(root, 1);
1086 inode->i_mapping, 1011 btrfs_throttle(root);
1087 dirty_pages);
1088 if (dirty_pages <
1089 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1090 btrfs_btree_balance_dirty(root, 1);
1091 btrfs_throttle(root);
1092 }
1093 }
1094 1012
1095 pos += copied; 1013 pos += copied;
1096 num_written += copied; 1014 num_written += copied;
1015 }
1097 1016
1098 cond_resched(); 1017 kfree(pages);
1018
1019 return num_written ? num_written : ret;
1020}
1021
1022static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1023 const struct iovec *iov,
1024 unsigned long nr_segs, loff_t pos,
1025 loff_t *ppos, size_t count, size_t ocount)
1026{
1027 struct file *file = iocb->ki_filp;
1028 struct inode *inode = fdentry(file)->d_inode;
1029 struct iov_iter i;
1030 ssize_t written;
1031 ssize_t written_buffered;
1032 loff_t endbyte;
1033 int err;
1034
1035 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1036 count, ocount);
1037
1038 /*
1039 * the generic O_DIRECT will update in-memory i_size after the
1040 * DIOs are done. But our endio handlers that update the on
1041 * disk i_size never update past the in memory i_size. So we
1042 * need one more update here to catch any additions to the
1043 * file
1044 */
1045 if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1046 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1047 mark_inode_dirty(inode);
1099 } 1048 }
1049
1050 if (written < 0 || written == count)
1051 return written;
1052
1053 pos += written;
1054 count -= written;
1055 iov_iter_init(&i, iov, nr_segs, count, written);
1056 written_buffered = __btrfs_buffered_write(file, &i, pos);
1057 if (written_buffered < 0) {
1058 err = written_buffered;
1059 goto out;
1060 }
1061 endbyte = pos + written_buffered - 1;
1062 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
1063 if (err)
1064 goto out;
1065 written += written_buffered;
1066 *ppos = pos + written_buffered;
1067 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1068 endbyte >> PAGE_CACHE_SHIFT);
1100out: 1069out:
1101 mutex_unlock(&inode->i_mutex); 1070 return written ? written : err;
1102 if (ret) 1071}
1103 err = ret;
1104 1072
1105 kfree(pages); 1073static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1106 *ppos = pos; 1074 const struct iovec *iov,
1075 unsigned long nr_segs, loff_t pos)
1076{
1077 struct file *file = iocb->ki_filp;
1078 struct inode *inode = fdentry(file)->d_inode;
1079 struct btrfs_root *root = BTRFS_I(inode)->root;
1080 loff_t *ppos = &iocb->ki_pos;
1081 ssize_t num_written = 0;
1082 ssize_t err = 0;
1083 size_t count, ocount;
1084
1085 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1086
1087 mutex_lock(&inode->i_mutex);
1088
1089 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
1090 if (err) {
1091 mutex_unlock(&inode->i_mutex);
1092 goto out;
1093 }
1094 count = ocount;
1095
1096 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1097 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1098 if (err) {
1099 mutex_unlock(&inode->i_mutex);
1100 goto out;
1101 }
1102
1103 if (count == 0) {
1104 mutex_unlock(&inode->i_mutex);
1105 goto out;
1106 }
1107
1108 err = file_remove_suid(file);
1109 if (err) {
1110 mutex_unlock(&inode->i_mutex);
1111 goto out;
1112 }
1113
1114 /*
1115 * If BTRFS flips readonly due to some impossible error
1116 * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
1117 * although we have opened a file as writable, we have
1118 * to stop this write operation to ensure FS consistency.
1119 */
1120 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
1121 mutex_unlock(&inode->i_mutex);
1122 err = -EROFS;
1123 goto out;
1124 }
1125
1126 file_update_time(file);
1127 BTRFS_I(inode)->sequence++;
1128
1129 if (unlikely(file->f_flags & O_DIRECT)) {
1130 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1131 pos, ppos, count, ocount);
1132 } else {
1133 struct iov_iter i;
1134
1135 iov_iter_init(&i, iov, nr_segs, count, num_written);
1136
1137 num_written = __btrfs_buffered_write(file, &i, pos);
1138 if (num_written > 0)
1139 *ppos = pos + num_written;
1140 }
1141
1142 mutex_unlock(&inode->i_mutex);
1107 1143
1108 /* 1144 /*
1109 * we want to make sure fsync finds this change 1145 * we want to make sure fsync finds this change
@@ -1118,43 +1154,12 @@ out:
1118 * one running right now. 1154 * one running right now.
1119 */ 1155 */
1120 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 1156 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1121 1157 if (num_written > 0 || num_written == -EIOCBQUEUED) {
1122 if (num_written > 0 && will_write) { 1158 err = generic_write_sync(file, pos, num_written);
1123 struct btrfs_trans_handle *trans; 1159 if (err < 0 && num_written > 0)
1124
1125 err = btrfs_wait_ordered_range(inode, start_pos, num_written);
1126 if (err)
1127 num_written = err; 1160 num_written = err;
1128
1129 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1130 trans = btrfs_start_transaction(root, 0);
1131 if (IS_ERR(trans)) {
1132 num_written = PTR_ERR(trans);
1133 goto done;
1134 }
1135 mutex_lock(&inode->i_mutex);
1136 ret = btrfs_log_dentry_safe(trans, root,
1137 file->f_dentry);
1138 mutex_unlock(&inode->i_mutex);
1139 if (ret == 0) {
1140 ret = btrfs_sync_log(trans, root);
1141 if (ret == 0)
1142 btrfs_end_transaction(trans, root);
1143 else
1144 btrfs_commit_transaction(trans, root);
1145 } else if (ret != BTRFS_NO_LOG_SYNC) {
1146 btrfs_commit_transaction(trans, root);
1147 } else {
1148 btrfs_end_transaction(trans, root);
1149 }
1150 }
1151 if (file->f_flags & O_DIRECT && buffered) {
1152 invalidate_mapping_pages(inode->i_mapping,
1153 start_pos >> PAGE_CACHE_SHIFT,
1154 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1155 }
1156 } 1161 }
1157done: 1162out:
1158 current->backing_dev_info = NULL; 1163 current->backing_dev_info = NULL;
1159 return num_written ? num_written : err; 1164 return num_written ? num_written : err;
1160} 1165}