aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h14
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode-map.c24
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/send.c5
-rw-r--r--fs/btrfs/super.c22
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/compat.c14
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/extents.c109
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/inode.c53
-rw-r--r--fs/ext4/mballoc.c18
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c51
-rw-r--r--fs/ext4/xattr.c23
-rw-r--r--fs/fcntl.c12
-rw-r--r--fs/kernfs/dir.c9
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/locks.c55
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/open.c21
-rw-r--r--fs/xfs/xfs_file.c10
28 files changed, 287 insertions, 226 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c48df572bd6..ba6b88528dc7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args {
2058#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) 2058#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
2059#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ 2059#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
2060 BTRFS_MOUNT_##opt) 2060 BTRFS_MOUNT_##opt)
2061#define btrfs_set_and_info(root, opt, fmt, args...) \
2062{ \
2063 if (!btrfs_test_opt(root, opt)) \
2064 btrfs_info(root->fs_info, fmt, ##args); \
2065 btrfs_set_opt(root->fs_info->mount_opt, opt); \
2066}
2067
2068#define btrfs_clear_and_info(root, opt, fmt, args...) \
2069{ \
2070 if (btrfs_test_opt(root, opt)) \
2071 btrfs_info(root->fs_info, fmt, ##args); \
2072 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
2073}
2074
2061/* 2075/*
2062 * Inode flags 2076 * Inode flags
2063 */ 2077 */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 029d46c2e170..983314932af3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2861,7 +2861,7 @@ retry_root_backup:
2861 printk(KERN_ERR "BTRFS: failed to read log tree\n"); 2861 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2862 free_extent_buffer(log_tree_root->node); 2862 free_extent_buffer(log_tree_root->node);
2863 kfree(log_tree_root); 2863 kfree(log_tree_root);
2864 goto fail_trans_kthread; 2864 goto fail_qgroup;
2865 } 2865 }
2866 /* returns with log_tree_root freed on success */ 2866 /* returns with log_tree_root freed on success */
2867 ret = btrfs_recover_log_trees(log_tree_root); 2867 ret = btrfs_recover_log_trees(log_tree_root);
@@ -2870,24 +2870,24 @@ retry_root_backup:
2870 "Failed to recover log tree"); 2870 "Failed to recover log tree");
2871 free_extent_buffer(log_tree_root->node); 2871 free_extent_buffer(log_tree_root->node);
2872 kfree(log_tree_root); 2872 kfree(log_tree_root);
2873 goto fail_trans_kthread; 2873 goto fail_qgroup;
2874 } 2874 }
2875 2875
2876 if (sb->s_flags & MS_RDONLY) { 2876 if (sb->s_flags & MS_RDONLY) {
2877 ret = btrfs_commit_super(tree_root); 2877 ret = btrfs_commit_super(tree_root);
2878 if (ret) 2878 if (ret)
2879 goto fail_trans_kthread; 2879 goto fail_qgroup;
2880 } 2880 }
2881 } 2881 }
2882 2882
2883 ret = btrfs_find_orphan_roots(tree_root); 2883 ret = btrfs_find_orphan_roots(tree_root);
2884 if (ret) 2884 if (ret)
2885 goto fail_trans_kthread; 2885 goto fail_qgroup;
2886 2886
2887 if (!(sb->s_flags & MS_RDONLY)) { 2887 if (!(sb->s_flags & MS_RDONLY)) {
2888 ret = btrfs_cleanup_fs_roots(fs_info); 2888 ret = btrfs_cleanup_fs_roots(fs_info);
2889 if (ret) 2889 if (ret)
2890 goto fail_trans_kthread; 2890 goto fail_qgroup;
2891 2891
2892 ret = btrfs_recover_relocation(tree_root); 2892 ret = btrfs_recover_relocation(tree_root);
2893 if (ret < 0) { 2893 if (ret < 0) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1306487c82cf..5590af92094b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1542,6 +1542,7 @@ again:
1542 ret = 0; 1542 ret = 0;
1543 } 1543 }
1544 if (ret) { 1544 if (ret) {
1545 key.objectid = bytenr;
1545 key.type = BTRFS_EXTENT_ITEM_KEY; 1546 key.type = BTRFS_EXTENT_ITEM_KEY;
1546 key.offset = num_bytes; 1547 key.offset = num_bytes;
1547 btrfs_release_path(path); 1548 btrfs_release_path(path);
@@ -3542,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3542 return extended_to_chunk(flags | tmp); 3543 return extended_to_chunk(flags | tmp);
3543} 3544}
3544 3545
3545static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3546static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
3546{ 3547{
3547 unsigned seq; 3548 unsigned seq;
3549 u64 flags;
3548 3550
3549 do { 3551 do {
3552 flags = orig_flags;
3550 seq = read_seqbegin(&root->fs_info->profiles_lock); 3553 seq = read_seqbegin(&root->fs_info->profiles_lock);
3551 3554
3552 if (flags & BTRFS_BLOCK_GROUP_DATA) 3555 if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -5719,6 +5722,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5719 5722
5720 if (ret > 0 && skinny_metadata) { 5723 if (ret > 0 && skinny_metadata) {
5721 skinny_metadata = false; 5724 skinny_metadata = false;
5725 key.objectid = bytenr;
5722 key.type = BTRFS_EXTENT_ITEM_KEY; 5726 key.type = BTRFS_EXTENT_ITEM_KEY;
5723 key.offset = num_bytes; 5727 key.offset = num_bytes;
5724 btrfs_release_path(path); 5728 btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eb742c07e7a4..ae6af072b635 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -800,7 +800,7 @@ next_slot:
800 if (start > key.offset && end < extent_end) { 800 if (start > key.offset && end < extent_end) {
801 BUG_ON(del_nr > 0); 801 BUG_ON(del_nr > 0);
802 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 802 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
803 ret = -EINVAL; 803 ret = -EOPNOTSUPP;
804 break; 804 break;
805 } 805 }
806 806
@@ -846,7 +846,7 @@ next_slot:
846 */ 846 */
847 if (start <= key.offset && end < extent_end) { 847 if (start <= key.offset && end < extent_end) {
848 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 848 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
849 ret = -EINVAL; 849 ret = -EOPNOTSUPP;
850 break; 850 break;
851 } 851 }
852 852
@@ -872,7 +872,7 @@ next_slot:
872 if (start > key.offset && end >= extent_end) { 872 if (start > key.offset && end >= extent_end) {
873 BUG_ON(del_nr > 0); 873 BUG_ON(del_nr > 0);
874 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 874 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
875 ret = -EINVAL; 875 ret = -EOPNOTSUPP;
876 break; 876 break;
877 } 877 }
878 878
@@ -1777,7 +1777,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1777 start_pos = round_down(pos, root->sectorsize); 1777 start_pos = round_down(pos, root->sectorsize);
1778 if (start_pos > i_size_read(inode)) { 1778 if (start_pos > i_size_read(inode)) {
1779 /* Expand hole size to cover write data, preventing empty gap */ 1779 /* Expand hole size to cover write data, preventing empty gap */
1780 end_pos = round_up(pos + iov->iov_len, root->sectorsize); 1780 end_pos = round_up(pos + count, root->sectorsize);
1781 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); 1781 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
1782 if (err) { 1782 if (err) {
1783 mutex_unlock(&inode->i_mutex); 1783 mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc8ca193d830..86935f5ae291 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root)
176 176
177 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", 177 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
178 root->root_key.objectid); 178 root->root_key.objectid);
179 BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ 179 if (IS_ERR(tsk)) {
180 btrfs_warn(root->fs_info, "failed to start inode caching task");
181 btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
182 "disabling inode map caching");
183 }
180} 184}
181 185
182int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 186int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -205,24 +209,14 @@ again:
205 209
206void btrfs_return_ino(struct btrfs_root *root, u64 objectid) 210void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
207{ 211{
208 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
209 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; 212 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
210 213
211 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 214 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
212 return; 215 return;
213
214again: 216again:
215 if (root->cached == BTRFS_CACHE_FINISHED) { 217 if (root->cached == BTRFS_CACHE_FINISHED) {
216 __btrfs_add_free_space(ctl, objectid, 1); 218 __btrfs_add_free_space(pinned, objectid, 1);
217 } else { 219 } else {
218 /*
219 * If we are in the process of caching free ino chunks,
220 * to avoid adding the same inode number to the free_ino
221 * tree twice due to cross transaction, we'll leave it
222 * in the pinned tree until a transaction is committed
223 * or the caching work is done.
224 */
225
226 down_write(&root->fs_info->commit_root_sem); 220 down_write(&root->fs_info->commit_root_sem);
227 spin_lock(&root->cache_lock); 221 spin_lock(&root->cache_lock);
228 if (root->cached == BTRFS_CACHE_FINISHED) { 222 if (root->cached == BTRFS_CACHE_FINISHED) {
@@ -234,11 +228,7 @@ again:
234 228
235 start_caching(root); 229 start_caching(root);
236 230
237 if (objectid <= root->cache_progress || 231 __btrfs_add_free_space(pinned, objectid, 1);
238 objectid >= root->highest_objectid)
239 __btrfs_add_free_space(ctl, objectid, 1);
240 else
241 __btrfs_add_free_space(pinned, objectid, 1);
242 232
243 up_write(&root->fs_info->commit_root_sem); 233 up_write(&root->fs_info->commit_root_sem);
244 } 234 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e79ff6b90cb7..2ad7de94efef 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3066,7 +3066,7 @@ process_slot:
3066 new_key.offset + datal, 3066 new_key.offset + datal,
3067 1); 3067 1);
3068 if (ret) { 3068 if (ret) {
3069 if (ret != -EINVAL) 3069 if (ret != -EOPNOTSUPP)
3070 btrfs_abort_transaction(trans, 3070 btrfs_abort_transaction(trans,
3071 root, ret); 3071 root, ret);
3072 btrfs_end_transaction(trans, root); 3072 btrfs_end_transaction(trans, root);
@@ -3141,7 +3141,7 @@ process_slot:
3141 new_key.offset + datal, 3141 new_key.offset + datal,
3142 1); 3142 1);
3143 if (ret) { 3143 if (ret) {
3144 if (ret != -EINVAL) 3144 if (ret != -EOPNOTSUPP)
3145 btrfs_abort_transaction(trans, 3145 btrfs_abort_transaction(trans,
3146 root, ret); 3146 root, ret);
3147 btrfs_end_transaction(trans, root); 3147 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1ac3ca98c429..eb6537a08c1b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
349 if (p->buf_len >= len) 349 if (p->buf_len >= len)
350 return 0; 350 return 0;
351 351
352 if (len > PATH_MAX) {
353 WARN_ON(1);
354 return -ENOMEM;
355 }
356
352 path_len = p->end - p->start; 357 path_len = p->end - p->start;
353 old_buf_len = p->buf_len; 358 old_buf_len = p->buf_len;
354 359
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 5011aadacab8..9601d25a4607 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -385,20 +385,6 @@ static match_table_t tokens = {
385 {Opt_err, NULL}, 385 {Opt_err, NULL},
386}; 386};
387 387
388#define btrfs_set_and_info(root, opt, fmt, args...) \
389{ \
390 if (!btrfs_test_opt(root, opt)) \
391 btrfs_info(root->fs_info, fmt, ##args); \
392 btrfs_set_opt(root->fs_info->mount_opt, opt); \
393}
394
395#define btrfs_clear_and_info(root, opt, fmt, args...) \
396{ \
397 if (btrfs_test_opt(root, opt)) \
398 btrfs_info(root->fs_info, fmt, ##args); \
399 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
400}
401
402/* 388/*
403 * Regular mount options parser. Everything that is needed only when 389 * Regular mount options parser. Everything that is needed only when
404 * reading in a new superblock is parsed here. 390 * reading in a new superblock is parsed here.
@@ -1186,7 +1172,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1186 return ERR_PTR(-ENOMEM); 1172 return ERR_PTR(-ENOMEM);
1187 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, 1173 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
1188 newargs); 1174 newargs);
1189 kfree(newargs);
1190 1175
1191 if (PTR_RET(mnt) == -EBUSY) { 1176 if (PTR_RET(mnt) == -EBUSY) {
1192 if (flags & MS_RDONLY) { 1177 if (flags & MS_RDONLY) {
@@ -1196,17 +1181,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1196 int r; 1181 int r;
1197 mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, 1182 mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
1198 newargs); 1183 newargs);
1199 if (IS_ERR(mnt)) 1184 if (IS_ERR(mnt)) {
1185 kfree(newargs);
1200 return ERR_CAST(mnt); 1186 return ERR_CAST(mnt);
1187 }
1201 1188
1202 r = btrfs_remount(mnt->mnt_sb, &flags, NULL); 1189 r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
1203 if (r < 0) { 1190 if (r < 0) {
1204 /* FIXME: release vfsmount mnt ??*/ 1191 /* FIXME: release vfsmount mnt ??*/
1192 kfree(newargs);
1205 return ERR_PTR(r); 1193 return ERR_PTR(r);
1206 } 1194 }
1207 } 1195 }
1208 } 1196 }
1209 1197
1198 kfree(newargs);
1199
1210 if (IS_ERR(mnt)) 1200 if (IS_ERR(mnt))
1211 return ERR_CAST(mnt); 1201 return ERR_CAST(mnt);
1212 1202
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 39da1c2efa50..88a6df4cbe6d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
1221 if (!S_ISREG(inode->i_mode)) 1221 if (!S_ISREG(inode->i_mode))
1222 return -EOPNOTSUPP; 1222 return -EOPNOTSUPP;
1223 1223
1224 if (IS_SWAPFILE(inode))
1225 return -ETXTBSY;
1226
1227 mutex_lock(&inode->i_mutex); 1224 mutex_lock(&inode->i_mutex);
1228 1225
1229 if (ceph_snap(inode) != CEPH_NOSNAP) { 1226 if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/compat.c b/fs/compat.c
index ca926ad0430c..66d3d3c6b4b2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
457 case F_GETLK64: 457 case F_GETLK64:
458 case F_SETLK64: 458 case F_SETLK64:
459 case F_SETLKW64: 459 case F_SETLKW64:
460 case F_GETLKP: 460 case F_OFD_GETLK:
461 case F_SETLKP: 461 case F_OFD_SETLK:
462 case F_SETLKPW: 462 case F_OFD_SETLKW:
463 ret = get_compat_flock64(&f, compat_ptr(arg)); 463 ret = get_compat_flock64(&f, compat_ptr(arg));
464 if (ret != 0) 464 if (ret != 0)
465 break; 465 break;
@@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
468 conv_cmd = convert_fcntl_cmd(cmd); 468 conv_cmd = convert_fcntl_cmd(cmd);
469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); 469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
470 set_fs(old_fs); 470 set_fs(old_fs);
471 if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { 471 if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
472 /* need to return lock information - see above for commentary */ 472 /* need to return lock information - see above for commentary */
473 if (f.l_start > COMPAT_LOFF_T_MAX) 473 if (f.l_start > COMPAT_LOFF_T_MAX)
474 ret = -EOVERFLOW; 474 ret = -EOVERFLOW;
@@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
493 case F_GETLK64: 493 case F_GETLK64:
494 case F_SETLK64: 494 case F_SETLK64:
495 case F_SETLKW64: 495 case F_SETLKW64:
496 case F_GETLKP: 496 case F_OFD_GETLK:
497 case F_SETLKP: 497 case F_OFD_SETLK:
498 case F_SETLKPW: 498 case F_OFD_SETLKW:
499 return -EINVAL; 499 return -EINVAL;
500 } 500 }
501 return compat_sys_fcntl64(fd, cmd, arg); 501 return compat_sys_fcntl64(fd, cmd, arg);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b1436bbc..5c56785007e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
667 continue; 667 continue;
668 668
669 x = ext4_count_free(bitmap_bh->b_data, 669 x = ext4_count_free(bitmap_bh->b_data,
670 EXT4_BLOCKS_PER_GROUP(sb) / 8); 670 EXT4_CLUSTERS_PER_GROUP(sb) / 8);
671 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 671 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
672 i, ext4_free_group_clusters(sb, gdp), x); 672 i, ext4_free_group_clusters(sb, gdp), x);
673 bitmap_count += x; 673 bitmap_count += x;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f1c65dc7cc0a..66946aa62127 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2466 up_write(&EXT4_I(inode)->i_data_sem); 2466 up_write(&EXT4_I(inode)->i_data_sem);
2467} 2467}
2468 2468
2469/*
2470 * Update i_disksize after writeback has been started. Races with truncate
2471 * are avoided by checking i_size under i_data_sem.
2472 */
2473static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
2474{
2475 loff_t i_size;
2476
2477 down_write(&EXT4_I(inode)->i_data_sem);
2478 i_size = i_size_read(inode);
2479 if (newsize > i_size)
2480 newsize = i_size;
2481 if (newsize > EXT4_I(inode)->i_disksize)
2482 EXT4_I(inode)->i_disksize = newsize;
2483 up_write(&EXT4_I(inode)->i_data_sem);
2484}
2485
2486struct ext4_group_info { 2469struct ext4_group_info {
2487 unsigned long bb_state; 2470 unsigned long bb_state;
2488 struct rb_root bb_free_root; 2471 struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 82df3ce9874a..01b0c208f625 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
3313 return PTR_ERR(path); 3313 return PTR_ERR(path);
3314 depth = ext_depth(inode); 3314 depth = ext_depth(inode);
3315 ex = path[depth].p_ext; 3315 ex = path[depth].p_ext;
3316 if (!ex) {
3317 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3318 (unsigned long) map->m_lblk);
3319 return -EIO;
3320 }
3316 uninitialized = ext4_ext_is_uninitialized(ex); 3321 uninitialized = ext4_ext_is_uninitialized(ex);
3317 split_flag1 = 0; 3322 split_flag1 = 0;
3318 3323
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
3694 } 3699 }
3695 depth = ext_depth(inode); 3700 depth = ext_depth(inode);
3696 ex = path[depth].p_ext; 3701 ex = path[depth].p_ext;
3702 if (!ex) {
3703 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3704 (unsigned long) map->m_lblk);
3705 err = -EIO;
3706 goto out;
3707 }
3697 } 3708 }
3698 3709
3699 err = ext4_ext_get_access(handle, inode, path + depth); 3710 err = ext4_ext_get_access(handle, inode, path + depth);
@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4730 4741
4731 trace_ext4_zero_range(inode, offset, len, mode); 4742 trace_ext4_zero_range(inode, offset, len, mode);
4732 4743
4744 if (!S_ISREG(inode->i_mode))
4745 return -EINVAL;
4746
4733 /* 4747 /*
4734 * Write out all dirty pages to avoid race conditions 4748 * Write out all dirty pages to avoid race conditions
4735 * Then release them. 4749 * Then release them.
@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4878 if (mode & FALLOC_FL_PUNCH_HOLE) 4892 if (mode & FALLOC_FL_PUNCH_HOLE)
4879 return ext4_punch_hole(inode, offset, len); 4893 return ext4_punch_hole(inode, offset, len);
4880 4894
4881 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4882 return ext4_collapse_range(inode, offset, len);
4883
4884 ret = ext4_convert_inline_data(inode); 4895 ret = ext4_convert_inline_data(inode);
4885 if (ret) 4896 if (ret)
4886 return ret; 4897 return ret;
@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4892 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4903 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4893 return -EOPNOTSUPP; 4904 return -EOPNOTSUPP;
4894 4905
4906 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4907 return ext4_collapse_range(inode, offset, len);
4908
4895 if (mode & FALLOC_FL_ZERO_RANGE) 4909 if (mode & FALLOC_FL_ZERO_RANGE)
4896 return ext4_zero_range(file, offset, len, mode); 4910 return ext4_zero_range(file, offset, len, mode);
4897 4911
@@ -5229,18 +5243,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5229 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) 5243 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5230 update = 1; 5244 update = 1;
5231 5245
5232 *start = ex_last->ee_block + 5246 *start = le32_to_cpu(ex_last->ee_block) +
5233 ext4_ext_get_actual_len(ex_last); 5247 ext4_ext_get_actual_len(ex_last);
5234 5248
5235 while (ex_start <= ex_last) { 5249 while (ex_start <= ex_last) {
5236 ex_start->ee_block -= shift; 5250 le32_add_cpu(&ex_start->ee_block, -shift);
5237 if (ex_start > 5251 /* Try to merge to the left. */
5238 EXT_FIRST_EXTENT(path[depth].p_hdr)) { 5252 if ((ex_start >
5239 if (ext4_ext_try_to_merge_right(inode, 5253 EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
5240 path, ex_start - 1)) 5254 ext4_ext_try_to_merge_right(inode,
5241 ex_last--; 5255 path, ex_start - 1))
5242 } 5256 ex_last--;
5243 ex_start++; 5257 else
5258 ex_start++;
5244 } 5259 }
5245 err = ext4_ext_dirty(handle, inode, path + depth); 5260 err = ext4_ext_dirty(handle, inode, path + depth);
5246 if (err) 5261 if (err)
@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5255 if (err) 5270 if (err)
5256 goto out; 5271 goto out;
5257 5272
5258 path[depth].p_idx->ei_block -= shift; 5273 le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5259 err = ext4_ext_dirty(handle, inode, path + depth); 5274 err = ext4_ext_dirty(handle, inode, path + depth);
5260 if (err) 5275 if (err)
5261 goto out; 5276 goto out;
@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5300 return ret; 5315 return ret;
5301 } 5316 }
5302 5317
5303 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); 5318 stop_block = le32_to_cpu(extent->ee_block) +
5319 ext4_ext_get_actual_len(extent);
5304 ext4_ext_drop_refs(path); 5320 ext4_ext_drop_refs(path);
5305 kfree(path); 5321 kfree(path);
5306 5322
@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5313 * enough to accomodate the shift. 5329 * enough to accomodate the shift.
5314 */ 5330 */
5315 path = ext4_ext_find_extent(inode, start - 1, NULL, 0); 5331 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5332 if (IS_ERR(path))
5333 return PTR_ERR(path);
5316 depth = path->p_depth; 5334 depth = path->p_depth;
5317 extent = path[depth].p_ext; 5335 extent = path[depth].p_ext;
5318 ex_start = extent->ee_block; 5336 if (extent) {
5319 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); 5337 ex_start = le32_to_cpu(extent->ee_block);
5338 ex_end = le32_to_cpu(extent->ee_block) +
5339 ext4_ext_get_actual_len(extent);
5340 } else {
5341 ex_start = 0;
5342 ex_end = 0;
5343 }
5320 ext4_ext_drop_refs(path); 5344 ext4_ext_drop_refs(path);
5321 kfree(path); 5345 kfree(path);
5322 5346
@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5331 return PTR_ERR(path); 5355 return PTR_ERR(path);
5332 depth = path->p_depth; 5356 depth = path->p_depth;
5333 extent = path[depth].p_ext; 5357 extent = path[depth].p_ext;
5334 current_block = extent->ee_block; 5358 if (!extent) {
5359 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5360 (unsigned long) start);
5361 return -EIO;
5362 }
5363
5364 current_block = le32_to_cpu(extent->ee_block);
5335 if (start > current_block) { 5365 if (start > current_block) {
5336 /* Hole, move to the next extent */ 5366 /* Hole, move to the next extent */
5337 ret = mext_next_extent(inode, path, &extent); 5367 ret = mext_next_extent(inode, path, &extent);
@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5365 ext4_lblk_t punch_start, punch_stop; 5395 ext4_lblk_t punch_start, punch_stop;
5366 handle_t *handle; 5396 handle_t *handle;
5367 unsigned int credits; 5397 unsigned int credits;
5368 loff_t new_size; 5398 loff_t new_size, ioffset;
5369 int ret; 5399 int ret;
5370 5400
5371 BUG_ON(offset + len > i_size_read(inode));
5372
5373 /* Collapse range works only on fs block size aligned offsets. */ 5401 /* Collapse range works only on fs block size aligned offsets. */
5374 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || 5402 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5375 len & (EXT4_BLOCK_SIZE(sb) - 1)) 5403 len & (EXT4_BLOCK_SIZE(sb) - 1))
5376 return -EINVAL; 5404 return -EINVAL;
5377 5405
5378 if (!S_ISREG(inode->i_mode)) 5406 if (!S_ISREG(inode->i_mode))
5407 return -EINVAL;
5408
5409 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
5379 return -EOPNOTSUPP; 5410 return -EOPNOTSUPP;
5380 5411
5381 trace_ext4_collapse_range(inode, offset, len); 5412 trace_ext4_collapse_range(inode, offset, len);
@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5383 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); 5414 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5384 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); 5415 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5385 5416
5417 /* Call ext4_force_commit to flush all data in case of data=journal. */
5418 if (ext4_should_journal_data(inode)) {
5419 ret = ext4_force_commit(inode->i_sb);
5420 if (ret)
5421 return ret;
5422 }
5423
5424 /*
5425 * Need to round down offset to be aligned with page size boundary
5426 * for page size > block size.
5427 */
5428 ioffset = round_down(offset, PAGE_SIZE);
5429
5386 /* Write out all dirty pages */ 5430 /* Write out all dirty pages */
5387 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); 5431 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5432 LLONG_MAX);
5388 if (ret) 5433 if (ret)
5389 return ret; 5434 return ret;
5390 5435
5391 /* Take mutex lock */ 5436 /* Take mutex lock */
5392 mutex_lock(&inode->i_mutex); 5437 mutex_lock(&inode->i_mutex);
5393 5438
5394 /* It's not possible punch hole on append only file */ 5439 /*
5395 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { 5440 * There is no need to overlap collapse range with EOF, in which case
5396 ret = -EPERM; 5441 * it is effectively a truncate operation
5397 goto out_mutex; 5442 */
5398 } 5443 if (offset + len >= i_size_read(inode)) {
5399 5444 ret = -EINVAL;
5400 if (IS_SWAPFILE(inode)) {
5401 ret = -ETXTBSY;
5402 goto out_mutex; 5445 goto out_mutex;
5403 } 5446 }
5404 5447
@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5408 goto out_mutex; 5451 goto out_mutex;
5409 } 5452 }
5410 5453
5411 truncate_pagecache_range(inode, offset, -1); 5454 truncate_pagecache(inode, ioffset);
5412 5455
5413 /* Wait for existing dio to complete */ 5456 /* Wait for existing dio to complete */
5414 ext4_inode_block_unlocked_dio(inode); 5457 ext4_inode_block_unlocked_dio(inode);
@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5425 ext4_discard_preallocations(inode); 5468 ext4_discard_preallocations(inode);
5426 5469
5427 ret = ext4_es_remove_extent(inode, punch_start, 5470 ret = ext4_es_remove_extent(inode, punch_start,
5428 EXT_MAX_BLOCKS - punch_start - 1); 5471 EXT_MAX_BLOCKS - punch_start);
5429 if (ret) { 5472 if (ret) {
5430 up_write(&EXT4_I(inode)->i_data_sem); 5473 up_write(&EXT4_I(inode)->i_data_sem);
5431 goto out_stop; 5474 goto out_stop;
@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5436 up_write(&EXT4_I(inode)->i_data_sem); 5479 up_write(&EXT4_I(inode)->i_data_sem);
5437 goto out_stop; 5480 goto out_stop;
5438 } 5481 }
5482 ext4_discard_preallocations(inode);
5439 5483
5440 ret = ext4_ext_shift_extents(inode, handle, punch_stop, 5484 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5441 punch_stop - punch_start); 5485 punch_stop - punch_start);
@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5445 } 5489 }
5446 5490
5447 new_size = i_size_read(inode) - len; 5491 new_size = i_size_read(inode) - len;
5448 truncate_setsize(inode, new_size); 5492 i_size_write(inode, new_size);
5449 EXT4_I(inode)->i_disksize = new_size; 5493 EXT4_I(inode)->i_disksize = new_size;
5450 5494
5451 ext4_discard_preallocations(inode);
5452 up_write(&EXT4_I(inode)->i_data_sem); 5495 up_write(&EXT4_I(inode)->i_data_sem);
5453 if (IS_SYNC(inode)) 5496 if (IS_SYNC(inode))
5454 ext4_handle_sync(handle); 5497 ext4_handle_sync(handle);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0a014a7194b2..0ebc21204b51 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -810,7 +810,7 @@ retry:
810 810
811 newes.es_lblk = end + 1; 811 newes.es_lblk = end + 1;
812 newes.es_len = len2; 812 newes.es_len = len2;
813 block = 0x7FDEADBEEF; 813 block = 0x7FDEADBEEFULL;
814 if (ext4_es_is_written(&orig_es) || 814 if (ext4_es_is_written(&orig_es) ||
815 ext4_es_is_unwritten(&orig_es)) 815 ext4_es_is_unwritten(&orig_es))
816 block = ext4_es_pblock(&orig_es) + 816 block = ext4_es_pblock(&orig_es) +
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ca7502d89fde..063fc1538355 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
82 size_t count = iov_length(iov, nr_segs); 82 size_t count = iov_length(iov, nr_segs);
83 loff_t final_size = pos + count; 83 loff_t final_size = pos + count;
84 84
85 if (pos >= inode->i_size) 85 if (pos >= i_size_read(inode))
86 return 0; 86 return 0;
87 87
88 if ((pos & blockmask) || (final_size & blockmask)) 88 if ((pos & blockmask) || (final_size & blockmask))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b0d2c7d5408..d7b7462a0e13 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
522 if (unlikely(map->m_len > INT_MAX)) 522 if (unlikely(map->m_len > INT_MAX))
523 map->m_len = INT_MAX; 523 map->m_len = INT_MAX;
524 524
525 /* We can handle the block number less than EXT_MAX_BLOCKS */
526 if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
527 return -EIO;
528
525 /* Lookup extent status tree firstly */ 529 /* Lookup extent status tree firstly */
526 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 530 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
527 ext4_es_lru_add(inode); 531 ext4_es_lru_add(inode);
@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2243 return err; 2247 return err;
2244 } while (map->m_len); 2248 } while (map->m_len);
2245 2249
2246 /* Update on-disk size after IO is submitted */ 2250 /*
2251 * Update on-disk size after IO is submitted. Races with
2252 * truncate are avoided by checking i_size under i_data_sem.
2253 */
2247 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2254 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2248 if (disksize > EXT4_I(inode)->i_disksize) { 2255 if (disksize > EXT4_I(inode)->i_disksize) {
2249 int err2; 2256 int err2;
2250 2257 loff_t i_size;
2251 ext4_wb_update_i_disksize(inode, disksize); 2258
2259 down_write(&EXT4_I(inode)->i_data_sem);
2260 i_size = i_size_read(inode);
2261 if (disksize > i_size)
2262 disksize = i_size;
2263 if (disksize > EXT4_I(inode)->i_disksize)
2264 EXT4_I(inode)->i_disksize = disksize;
2252 err2 = ext4_mark_inode_dirty(handle, inode); 2265 err2 = ext4_mark_inode_dirty(handle, inode);
2266 up_write(&EXT4_I(inode)->i_data_sem);
2253 if (err2) 2267 if (err2)
2254 ext4_error(inode->i_sb, 2268 ext4_error(inode->i_sb,
2255 "Failed to mark inode %lu dirty", 2269 "Failed to mark inode %lu dirty",
@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3527 } 3541 }
3528 3542
3529 mutex_lock(&inode->i_mutex); 3543 mutex_lock(&inode->i_mutex);
3530 /* It's not possible punch hole on append only file */
3531 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
3532 ret = -EPERM;
3533 goto out_mutex;
3534 }
3535 if (IS_SWAPFILE(inode)) {
3536 ret = -ETXTBSY;
3537 goto out_mutex;
3538 }
3539 3544
3540 /* No need to punch hole beyond i_size */ 3545 /* No need to punch hole beyond i_size */
3541 if (offset >= inode->i_size) 3546 if (offset >= inode->i_size)
@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3616 ret = ext4_free_hole_blocks(handle, inode, first_block, 3621 ret = ext4_free_hole_blocks(handle, inode, first_block,
3617 stop_block); 3622 stop_block);
3618 3623
3619 ext4_discard_preallocations(inode);
3620 up_write(&EXT4_I(inode)->i_data_sem); 3624 up_write(&EXT4_I(inode)->i_data_sem);
3621 if (IS_SYNC(inode)) 3625 if (IS_SYNC(inode))
3622 ext4_handle_sync(handle); 3626 ext4_handle_sync(handle);
@@ -4423,21 +4427,20 @@ out_brelse:
4423 * 4427 *
4424 * We are called from a few places: 4428 * We are called from a few places:
4425 * 4429 *
4426 * - Within generic_file_write() for O_SYNC files. 4430 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
4427 * Here, there will be no transaction running. We wait for any running 4431 * Here, there will be no transaction running. We wait for any running
4428 * transaction to commit. 4432 * transaction to commit.
4429 * 4433 *
4430 * - Within sys_sync(), kupdate and such. 4434 * - Within flush work (sys_sync(), kupdate and such).
4431 * We wait on commit, if tol to. 4435 * We wait on commit, if told to.
4432 * 4436 *
4433 * - Within prune_icache() (PF_MEMALLOC == true) 4437 * - Within iput_final() -> write_inode_now()
4434 * Here we simply return. We can't afford to block kswapd on the 4438 * We wait on commit, if told to.
4435 * journal commit.
4436 * 4439 *
4437 * In all cases it is actually safe for us to return without doing anything, 4440 * In all cases it is actually safe for us to return without doing anything,
4438 * because the inode has been copied into a raw inode buffer in 4441 * because the inode has been copied into a raw inode buffer in
4439 * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 4442 * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
4440 * knfsd. 4443 * writeback.
4441 * 4444 *
4442 * Note that we are absolutely dependent upon all inode dirtiers doing the 4445 * Note that we are absolutely dependent upon all inode dirtiers doing the
4443 * right thing: they *must* call mark_inode_dirty() after dirtying info in 4446 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -4449,15 +4452,15 @@ out_brelse:
4449 * stuff(); 4452 * stuff();
4450 * inode->i_size = expr; 4453 * inode->i_size = expr;
4451 * 4454 *
4452 * is in error because a kswapd-driven write_inode() could occur while 4455 * is in error because write_inode() could occur while `stuff()' is running,
4453 * `stuff()' is running, and the new i_size will be lost. Plus the inode 4456 * and the new i_size will be lost. Plus the inode will no longer be on the
4454 * will no longer be on the superblock's dirty inode list. 4457 * superblock's dirty inode list.
4455 */ 4458 */
4456int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) 4459int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4457{ 4460{
4458 int err; 4461 int err;
4459 4462
4460 if (current->flags & PF_MEMALLOC) 4463 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
4461 return 0; 4464 return 0;
4462 4465
4463 if (EXT4_SB(inode->i_sb)->s_journal) { 4466 if (EXT4_SB(inode->i_sb)->s_journal) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a888cac76e9c..c8238a26818c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
989 poff = block % blocks_per_page; 989 poff = block % blocks_per_page;
990 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 990 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
991 if (!page) 991 if (!page)
992 return -EIO; 992 return -ENOMEM;
993 BUG_ON(page->mapping != inode->i_mapping); 993 BUG_ON(page->mapping != inode->i_mapping);
994 e4b->bd_bitmap_page = page; 994 e4b->bd_bitmap_page = page;
995 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); 995 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1003 pnum = block / blocks_per_page; 1003 pnum = block / blocks_per_page;
1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1005 if (!page) 1005 if (!page)
1006 return -EIO; 1006 return -ENOMEM;
1007 BUG_ON(page->mapping != inode->i_mapping); 1007 BUG_ON(page->mapping != inode->i_mapping);
1008 e4b->bd_buddy_page = page; 1008 e4b->bd_buddy_page = page;
1009 return 0; 1009 return 0;
@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1168 unlock_page(page); 1168 unlock_page(page);
1169 } 1169 }
1170 } 1170 }
1171 if (page == NULL || !PageUptodate(page)) { 1171 if (page == NULL) {
1172 ret = -ENOMEM;
1173 goto err;
1174 }
1175 if (!PageUptodate(page)) {
1172 ret = -EIO; 1176 ret = -EIO;
1173 goto err; 1177 goto err;
1174 } 1178 }
@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1197 unlock_page(page); 1201 unlock_page(page);
1198 } 1202 }
1199 } 1203 }
1200 if (page == NULL || !PageUptodate(page)) { 1204 if (page == NULL) {
1205 ret = -ENOMEM;
1206 goto err;
1207 }
1208 if (!PageUptodate(page)) {
1201 ret = -EIO; 1209 ret = -EIO;
1202 goto err; 1210 goto err;
1203 } 1211 }
@@ -5008,6 +5016,8 @@ error_return:
5008 */ 5016 */
5009static int ext4_trim_extent(struct super_block *sb, int start, int count, 5017static int ext4_trim_extent(struct super_block *sb, int start, int count,
5010 ext4_group_t group, struct ext4_buddy *e4b) 5018 ext4_group_t group, struct ext4_buddy *e4b)
5019__releases(bitlock)
5020__acquires(bitlock)
5011{ 5021{
5012 struct ext4_free_extent ex; 5022 struct ext4_free_extent ex;
5013 int ret = 0; 5023 int ret = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ab95508e3d40..c18d95b50540 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
308 if (error) { 308 if (error) {
309 struct inode *inode = io_end->inode; 309 struct inode *inode = io_end->inode;
310 310
311 ext4_warning(inode->i_sb, "I/O error writing to inode %lu " 311 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
312 "(offset %llu size %ld starting block %llu)", 312 "(offset %llu size %ld starting block %llu)",
313 inode->i_ino, 313 error, inode->i_ino,
314 (unsigned long long) io_end->offset, 314 (unsigned long long) io_end->offset,
315 (long) io_end->size, 315 (long) io_end->size,
316 (unsigned long long) 316 (unsigned long long)
317 bi_sector >> (inode->i_blkbits - 9)); 317 bi_sector >> (inode->i_blkbits - 9));
318 mapping_set_error(inode->i_mapping, error);
318 } 319 }
319 320
320 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 321 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f3c667091618..6f9e6fadac04 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3869 goto failed_mount2; 3869 goto failed_mount2;
3870 } 3870 }
3871 } 3871 }
3872
3873 /*
3874 * set up enough so that it can read an inode,
3875 * and create new inode for buddy allocator
3876 */
3877 sbi->s_gdb_count = db_count;
3878 if (!test_opt(sb, NOLOAD) &&
3879 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3880 sb->s_op = &ext4_sops;
3881 else
3882 sb->s_op = &ext4_nojournal_sops;
3883
3884 ext4_ext_init(sb);
3885 err = ext4_mb_init(sb);
3886 if (err) {
3887 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3888 err);
3889 goto failed_mount2;
3890 }
3891
3872 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3892 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3873 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3893 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3874 goto failed_mount2; 3894 goto failed_mount2a;
3875 } 3895 }
3876 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3896 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3877 if (!ext4_fill_flex_info(sb)) { 3897 if (!ext4_fill_flex_info(sb)) {
3878 ext4_msg(sb, KERN_ERR, 3898 ext4_msg(sb, KERN_ERR,
3879 "unable to initialize " 3899 "unable to initialize "
3880 "flex_bg meta info!"); 3900 "flex_bg meta info!");
3881 goto failed_mount2; 3901 goto failed_mount2a;
3882 } 3902 }
3883 3903
3884 sbi->s_gdb_count = db_count;
3885 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3904 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3886 spin_lock_init(&sbi->s_next_gen_lock); 3905 spin_lock_init(&sbi->s_next_gen_lock);
3887 3906
@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3916 sbi->s_stripe = ext4_get_stripe_size(sbi); 3935 sbi->s_stripe = ext4_get_stripe_size(sbi);
3917 sbi->s_extent_max_zeroout_kb = 32; 3936 sbi->s_extent_max_zeroout_kb = 32;
3918 3937
3919 /*
3920 * set up enough so that it can read an inode
3921 */
3922 if (!test_opt(sb, NOLOAD) &&
3923 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3924 sb->s_op = &ext4_sops;
3925 else
3926 sb->s_op = &ext4_nojournal_sops;
3927 sb->s_export_op = &ext4_export_ops; 3938 sb->s_export_op = &ext4_export_ops;
3928 sb->s_xattr = ext4_xattr_handlers; 3939 sb->s_xattr = ext4_xattr_handlers;
3929#ifdef CONFIG_QUOTA 3940#ifdef CONFIG_QUOTA
@@ -4113,21 +4124,13 @@ no_journal:
4113 if (err) { 4124 if (err) {
4114 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4125 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4115 "reserved pool", ext4_calculate_resv_clusters(sb)); 4126 "reserved pool", ext4_calculate_resv_clusters(sb));
4116 goto failed_mount4a; 4127 goto failed_mount5;
4117 } 4128 }
4118 4129
4119 err = ext4_setup_system_zone(sb); 4130 err = ext4_setup_system_zone(sb);
4120 if (err) { 4131 if (err) {
4121 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4132 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4122 "zone (%d)", err); 4133 "zone (%d)", err);
4123 goto failed_mount4a;
4124 }
4125
4126 ext4_ext_init(sb);
4127 err = ext4_mb_init(sb);
4128 if (err) {
4129 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4130 err);
4131 goto failed_mount5; 4134 goto failed_mount5;
4132 } 4135 }
4133 4136
@@ -4204,11 +4207,8 @@ failed_mount8:
4204failed_mount7: 4207failed_mount7:
4205 ext4_unregister_li_request(sb); 4208 ext4_unregister_li_request(sb);
4206failed_mount6: 4209failed_mount6:
4207 ext4_mb_release(sb);
4208failed_mount5:
4209 ext4_ext_release(sb);
4210 ext4_release_system_zone(sb); 4210 ext4_release_system_zone(sb);
4211failed_mount4a: 4211failed_mount5:
4212 dput(sb->s_root); 4212 dput(sb->s_root);
4213 sb->s_root = NULL; 4213 sb->s_root = NULL;
4214failed_mount4: 4214failed_mount4:
@@ -4232,11 +4232,14 @@ failed_mount3:
4232 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4232 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4233 if (sbi->s_mmp_tsk) 4233 if (sbi->s_mmp_tsk)
4234 kthread_stop(sbi->s_mmp_tsk); 4234 kthread_stop(sbi->s_mmp_tsk);
4235failed_mount2a:
4236 ext4_mb_release(sb);
4235failed_mount2: 4237failed_mount2:
4236 for (i = 0; i < db_count; i++) 4238 for (i = 0; i < db_count; i++)
4237 brelse(sbi->s_group_desc[i]); 4239 brelse(sbi->s_group_desc[i]);
4238 ext4_kvfree(sbi->s_group_desc); 4240 ext4_kvfree(sbi->s_group_desc);
4239failed_mount: 4241failed_mount:
4242 ext4_ext_release(sb);
4240 if (sbi->s_chksum_driver) 4243 if (sbi->s_chksum_driver)
4241 crypto_free_shash(sbi->s_chksum_driver); 4244 crypto_free_shash(sbi->s_chksum_driver);
4242 if (sbi->s_proc) { 4245 if (sbi->s_proc) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1f5cf5880718..4eec399ec807 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
520} 520}
521 521
522/* 522/*
523 * Release the xattr block BH: If the reference count is > 1, decrement 523 * Release the xattr block BH: If the reference count is > 1, decrement it;
524 * it; otherwise free the block. 524 * otherwise free the block.
525 */ 525 */
526static void 526static void
527ext4_xattr_release_block(handle_t *handle, struct inode *inode, 527ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
542 if (ce) 542 if (ce)
543 mb_cache_entry_free(ce); 543 mb_cache_entry_free(ce);
544 get_bh(bh); 544 get_bh(bh);
545 unlock_buffer(bh);
545 ext4_free_blocks(handle, inode, bh, 0, 1, 546 ext4_free_blocks(handle, inode, bh, 0, 1,
546 EXT4_FREE_BLOCKS_METADATA | 547 EXT4_FREE_BLOCKS_METADATA |
547 EXT4_FREE_BLOCKS_FORGET); 548 EXT4_FREE_BLOCKS_FORGET);
548 unlock_buffer(bh);
549 } else { 549 } else {
550 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 550 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
551 if (ce) 551 if (ce)
552 mb_cache_entry_release(ce); 552 mb_cache_entry_release(ce);
553 /*
554 * Beware of this ugliness: Releasing of xattr block references
555 * from different inodes can race and so we have to protect
556 * from a race where someone else frees the block (and releases
557 * its journal_head) before we are done dirtying the buffer. In
558 * nojournal mode this race is harmless and we actually cannot
559 * call ext4_handle_dirty_xattr_block() with locked buffer as
560 * that function can call sync_dirty_buffer() so for that case
561 * we handle the dirtying after unlocking the buffer.
562 */
563 if (ext4_handle_valid(handle))
564 error = ext4_handle_dirty_xattr_block(handle, inode,
565 bh);
553 unlock_buffer(bh); 566 unlock_buffer(bh);
554 error = ext4_handle_dirty_xattr_block(handle, inode, bh); 567 if (!ext4_handle_valid(handle))
568 error = ext4_handle_dirty_xattr_block(handle, inode,
569 bh);
555 if (IS_SYNC(inode)) 570 if (IS_SYNC(inode))
556 ext4_handle_sync(handle); 571 ext4_handle_sync(handle);
557 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); 572 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9ead1596399a..72c82f69b01b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
274 break; 274 break;
275#if BITS_PER_LONG != 32 275#if BITS_PER_LONG != 32
276 /* 32-bit arches must use fcntl64() */ 276 /* 32-bit arches must use fcntl64() */
277 case F_GETLKP: 277 case F_OFD_GETLK:
278#endif 278#endif
279 case F_GETLK: 279 case F_GETLK:
280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); 280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
281 break; 281 break;
282#if BITS_PER_LONG != 32 282#if BITS_PER_LONG != 32
283 /* 32-bit arches must use fcntl64() */ 283 /* 32-bit arches must use fcntl64() */
284 case F_SETLKP: 284 case F_OFD_SETLK:
285 case F_SETLKPW: 285 case F_OFD_SETLKW:
286#endif 286#endif
287 /* Fallthrough */ 287 /* Fallthrough */
288 case F_SETLK: 288 case F_SETLK:
@@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
399 399
400 switch (cmd) { 400 switch (cmd) {
401 case F_GETLK64: 401 case F_GETLK64:
402 case F_GETLKP: 402 case F_OFD_GETLK:
403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); 403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
404 break; 404 break;
405 case F_SETLK64: 405 case F_SETLK64:
406 case F_SETLKW64: 406 case F_SETLKW64:
407 case F_SETLKP: 407 case F_OFD_SETLK:
408 case F_SETLKPW: 408 case F_OFD_SETLKW:
409 err = fcntl_setlk64(fd, f.file, cmd, 409 err = fcntl_setlk64(fd, f.file, cmd,
410 (struct flock64 __user *) arg); 410 (struct flock64 __user *) arg);
411 break; 411 break;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 78f3403300af..ac127cd008bf 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -232,9 +232,6 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
232 struct rb_node **node = &kn->parent->dir.children.rb_node; 232 struct rb_node **node = &kn->parent->dir.children.rb_node;
233 struct rb_node *parent = NULL; 233 struct rb_node *parent = NULL;
234 234
235 if (kernfs_type(kn) == KERNFS_DIR)
236 kn->parent->dir.subdirs++;
237
238 while (*node) { 235 while (*node) {
239 struct kernfs_node *pos; 236 struct kernfs_node *pos;
240 int result; 237 int result;
@@ -249,9 +246,15 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
249 else 246 else
250 return -EEXIST; 247 return -EEXIST;
251 } 248 }
249
252 /* add new node and rebalance the tree */ 250 /* add new node and rebalance the tree */
253 rb_link_node(&kn->rb, parent, node); 251 rb_link_node(&kn->rb, parent, node);
254 rb_insert_color(&kn->rb, &kn->parent->dir.children); 252 rb_insert_color(&kn->rb, &kn->parent->dir.children);
253
254 /* successfully added, account subdir number */
255 if (kernfs_type(kn) == KERNFS_DIR)
256 kn->parent->dir.subdirs++;
257
255 return 0; 258 return 0;
256} 259}
257 260
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8034706a7af8..e01ea4a14a01 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -484,6 +484,8 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
484 484
485 ops = kernfs_ops(of->kn); 485 ops = kernfs_ops(of->kn);
486 rc = ops->mmap(of, vma); 486 rc = ops->mmap(of, vma);
487 if (rc)
488 goto out_put;
487 489
488 /* 490 /*
489 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 491 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
diff --git a/fs/locks.c b/fs/locks.c
index 13fc7a6d380a..e663aeac579e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -135,7 +135,7 @@
135#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 135#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
136#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 136#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
137#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) 137#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG))
138#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) 138#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
139 139
140static bool lease_breaking(struct file_lock *fl) 140static bool lease_breaking(struct file_lock *fl)
141{ 141{
@@ -564,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker,
564 BUG_ON(!list_empty(&waiter->fl_block)); 564 BUG_ON(!list_empty(&waiter->fl_block));
565 waiter->fl_next = blocker; 565 waiter->fl_next = blocker;
566 list_add_tail(&waiter->fl_block, &blocker->fl_block); 566 list_add_tail(&waiter->fl_block, &blocker->fl_block);
567 if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) 567 if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
568 locks_insert_global_blocked(waiter); 568 locks_insert_global_blocked(waiter);
569} 569}
570 570
@@ -759,12 +759,12 @@ EXPORT_SYMBOL(posix_test_lock);
759 * of tasks (such as posix threads) sharing the same open file table. 759 * of tasks (such as posix threads) sharing the same open file table.
760 * To handle those cases, we just bail out after a few iterations. 760 * To handle those cases, we just bail out after a few iterations.
761 * 761 *
762 * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. 762 * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
763 * Because the owner is not even nominally tied to a thread of 763 * Because the owner is not even nominally tied to a thread of
764 * execution, the deadlock detection below can't reasonably work well. Just 764 * execution, the deadlock detection below can't reasonably work well. Just
765 * skip it for those. 765 * skip it for those.
766 * 766 *
767 * In principle, we could do a more limited deadlock detection on FL_FILE_PVT 767 * In principle, we could do a more limited deadlock detection on FL_OFDLCK
768 * locks that just checks for the case where two tasks are attempting to 768 * locks that just checks for the case where two tasks are attempting to
769 * upgrade from read to write locks on the same inode. 769 * upgrade from read to write locks on the same inode.
770 */ 770 */
@@ -791,9 +791,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
791 791
792 /* 792 /*
793 * This deadlock detector can't reasonably detect deadlocks with 793 * This deadlock detector can't reasonably detect deadlocks with
794 * FL_FILE_PVT locks, since they aren't owned by a process, per-se. 794 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
795 */ 795 */
796 if (IS_FILE_PVT(caller_fl)) 796 if (IS_OFDLCK(caller_fl))
797 return 0; 797 return 0;
798 798
799 while ((block_fl = what_owner_is_waiting_for(block_fl))) { 799 while ((block_fl = what_owner_is_waiting_for(block_fl))) {
@@ -1391,11 +1391,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1391 1391
1392restart: 1392restart:
1393 break_time = flock->fl_break_time; 1393 break_time = flock->fl_break_time;
1394 if (break_time != 0) { 1394 if (break_time != 0)
1395 break_time -= jiffies; 1395 break_time -= jiffies;
1396 if (break_time == 0) 1396 if (break_time == 0)
1397 break_time++; 1397 break_time++;
1398 }
1399 locks_insert_block(flock, new_fl); 1398 locks_insert_block(flock, new_fl);
1400 spin_unlock(&inode->i_lock); 1399 spin_unlock(&inode->i_lock);
1401 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1400 error = wait_event_interruptible_timeout(new_fl->fl_wait,
@@ -1891,7 +1890,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
1891 1890
1892static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) 1891static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1893{ 1892{
1894 flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; 1893 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1895#if BITS_PER_LONG == 32 1894#if BITS_PER_LONG == 32
1896 /* 1895 /*
1897 * Make sure we can represent the posix lock via 1896 * Make sure we can represent the posix lock via
@@ -1913,7 +1912,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1913#if BITS_PER_LONG == 32 1912#if BITS_PER_LONG == 32
1914static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) 1913static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
1915{ 1914{
1916 flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; 1915 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1917 flock->l_start = fl->fl_start; 1916 flock->l_start = fl->fl_start;
1918 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 1917 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1919 fl->fl_end - fl->fl_start + 1; 1918 fl->fl_end - fl->fl_start + 1;
@@ -1942,13 +1941,13 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1942 if (error) 1941 if (error)
1943 goto out; 1942 goto out;
1944 1943
1945 if (cmd == F_GETLKP) { 1944 if (cmd == F_OFD_GETLK) {
1946 error = -EINVAL; 1945 error = -EINVAL;
1947 if (flock.l_pid != 0) 1946 if (flock.l_pid != 0)
1948 goto out; 1947 goto out;
1949 1948
1950 cmd = F_GETLK; 1949 cmd = F_GETLK;
1951 file_lock.fl_flags |= FL_FILE_PVT; 1950 file_lock.fl_flags |= FL_OFDLCK;
1952 file_lock.fl_owner = (fl_owner_t)filp; 1951 file_lock.fl_owner = (fl_owner_t)filp;
1953 } 1952 }
1954 1953
@@ -2074,25 +2073,25 @@ again:
2074 2073
2075 /* 2074 /*
2076 * If the cmd is requesting file-private locks, then set the 2075 * If the cmd is requesting file-private locks, then set the
2077 * FL_FILE_PVT flag and override the owner. 2076 * FL_OFDLCK flag and override the owner.
2078 */ 2077 */
2079 switch (cmd) { 2078 switch (cmd) {
2080 case F_SETLKP: 2079 case F_OFD_SETLK:
2081 error = -EINVAL; 2080 error = -EINVAL;
2082 if (flock.l_pid != 0) 2081 if (flock.l_pid != 0)
2083 goto out; 2082 goto out;
2084 2083
2085 cmd = F_SETLK; 2084 cmd = F_SETLK;
2086 file_lock->fl_flags |= FL_FILE_PVT; 2085 file_lock->fl_flags |= FL_OFDLCK;
2087 file_lock->fl_owner = (fl_owner_t)filp; 2086 file_lock->fl_owner = (fl_owner_t)filp;
2088 break; 2087 break;
2089 case F_SETLKPW: 2088 case F_OFD_SETLKW:
2090 error = -EINVAL; 2089 error = -EINVAL;
2091 if (flock.l_pid != 0) 2090 if (flock.l_pid != 0)
2092 goto out; 2091 goto out;
2093 2092
2094 cmd = F_SETLKW; 2093 cmd = F_SETLKW;
2095 file_lock->fl_flags |= FL_FILE_PVT; 2094 file_lock->fl_flags |= FL_OFDLCK;
2096 file_lock->fl_owner = (fl_owner_t)filp; 2095 file_lock->fl_owner = (fl_owner_t)filp;
2097 /* Fallthrough */ 2096 /* Fallthrough */
2098 case F_SETLKW: 2097 case F_SETLKW:
@@ -2144,13 +2143,13 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
2144 if (error) 2143 if (error)
2145 goto out; 2144 goto out;
2146 2145
2147 if (cmd == F_GETLKP) { 2146 if (cmd == F_OFD_GETLK) {
2148 error = -EINVAL; 2147 error = -EINVAL;
2149 if (flock.l_pid != 0) 2148 if (flock.l_pid != 0)
2150 goto out; 2149 goto out;
2151 2150
2152 cmd = F_GETLK64; 2151 cmd = F_GETLK64;
2153 file_lock.fl_flags |= FL_FILE_PVT; 2152 file_lock.fl_flags |= FL_OFDLCK;
2154 file_lock.fl_owner = (fl_owner_t)filp; 2153 file_lock.fl_owner = (fl_owner_t)filp;
2155 } 2154 }
2156 2155
@@ -2209,25 +2208,25 @@ again:
2209 2208
2210 /* 2209 /*
2211 * If the cmd is requesting file-private locks, then set the 2210 * If the cmd is requesting file-private locks, then set the
2212 * FL_FILE_PVT flag and override the owner. 2211 * FL_OFDLCK flag and override the owner.
2213 */ 2212 */
2214 switch (cmd) { 2213 switch (cmd) {
2215 case F_SETLKP: 2214 case F_OFD_SETLK:
2216 error = -EINVAL; 2215 error = -EINVAL;
2217 if (flock.l_pid != 0) 2216 if (flock.l_pid != 0)
2218 goto out; 2217 goto out;
2219 2218
2220 cmd = F_SETLK64; 2219 cmd = F_SETLK64;
2221 file_lock->fl_flags |= FL_FILE_PVT; 2220 file_lock->fl_flags |= FL_OFDLCK;
2222 file_lock->fl_owner = (fl_owner_t)filp; 2221 file_lock->fl_owner = (fl_owner_t)filp;
2223 break; 2222 break;
2224 case F_SETLKPW: 2223 case F_OFD_SETLKW:
2225 error = -EINVAL; 2224 error = -EINVAL;
2226 if (flock.l_pid != 0) 2225 if (flock.l_pid != 0)
2227 goto out; 2226 goto out;
2228 2227
2229 cmd = F_SETLKW64; 2228 cmd = F_SETLKW64;
2230 file_lock->fl_flags |= FL_FILE_PVT; 2229 file_lock->fl_flags |= FL_OFDLCK;
2231 file_lock->fl_owner = (fl_owner_t)filp; 2230 file_lock->fl_owner = (fl_owner_t)filp;
2232 /* Fallthrough */ 2231 /* Fallthrough */
2233 case F_SETLKW64: 2232 case F_SETLKW64:
@@ -2413,8 +2412,8 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2413 if (IS_POSIX(fl)) { 2412 if (IS_POSIX(fl)) {
2414 if (fl->fl_flags & FL_ACCESS) 2413 if (fl->fl_flags & FL_ACCESS)
2415 seq_printf(f, "ACCESS"); 2414 seq_printf(f, "ACCESS");
2416 else if (IS_FILE_PVT(fl)) 2415 else if (IS_OFDLCK(fl))
2417 seq_printf(f, "FLPVT "); 2416 seq_printf(f, "OFDLCK");
2418 else 2417 else
2419 seq_printf(f, "POSIX "); 2418 seq_printf(f, "POSIX ");
2420 2419
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 39c8ef875f91..2c73cae9899d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -654,9 +654,11 @@ static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
654 654
655static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) 655static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
656{ 656{
657 int maxtime = max_cb_time(clp->net);
657 struct rpc_timeout timeparms = { 658 struct rpc_timeout timeparms = {
658 .to_initval = max_cb_time(clp->net), 659 .to_initval = maxtime,
659 .to_retries = 0, 660 .to_retries = 0,
661 .to_maxval = maxtime,
660 }; 662 };
661 struct rpc_create_args args = { 663 struct rpc_create_args args = {
662 .net = clp->net, 664 .net = clp->net,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2723c1badd01..18881f34737a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3627,14 +3627,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3627 /* nfsd4_check_resp_size guarantees enough room for error status */ 3627 /* nfsd4_check_resp_size guarantees enough room for error status */
3628 if (!op->status) 3628 if (!op->status)
3629 op->status = nfsd4_check_resp_size(resp, 0); 3629 op->status = nfsd4_check_resp_size(resp, 0);
3630 if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
3631 struct nfsd4_slot *slot = resp->cstate.slot;
3632
3633 if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
3634 op->status = nfserr_rep_too_big_to_cache;
3635 else
3636 op->status = nfserr_rep_too_big;
3637 }
3638 if (so) { 3630 if (so) {
3639 so->so_replay.rp_status = op->status; 3631 so->so_replay.rp_status = op->status;
3640 so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); 3632 so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
diff --git a/fs/open.c b/fs/open.c
index 3d30eb1fc95e..9d64679cec73 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -254,17 +254,22 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
254 return -EBADF; 254 return -EBADF;
255 255
256 /* 256 /*
257 * It's not possible to punch hole or perform collapse range 257 * We can only allow pure fallocate on append only files
258 * on append only file
259 */ 258 */
260 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE) 259 if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
261 && IS_APPEND(inode))
262 return -EPERM; 260 return -EPERM;
263 261
264 if (IS_IMMUTABLE(inode)) 262 if (IS_IMMUTABLE(inode))
265 return -EPERM; 263 return -EPERM;
266 264
267 /* 265 /*
266 * We can not allow to do any fallocate operation on an active
267 * swapfile
268 */
269 if (IS_SWAPFILE(inode))
270 ret = -ETXTBSY;
271
272 /*
268 * Revalidate the write permissions, in case security policy has 273 * Revalidate the write permissions, in case security policy has
269 * changed since the files were opened. 274 * changed since the files were opened.
270 */ 275 */
@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
286 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 291 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
287 return -EFBIG; 292 return -EFBIG;
288 293
289 /*
290 * There is no need to overlap collapse range with EOF, in which case
291 * it is effectively a truncate operation
292 */
293 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
294 (offset + len >= i_size_read(inode)))
295 return -EINVAL;
296
297 if (!file->f_op->fallocate) 294 if (!file->f_op->fallocate)
298 return -EOPNOTSUPP; 295 return -EOPNOTSUPP;
299 296
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 82afdcb33183..951a2321ee01 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -841,7 +841,15 @@ xfs_file_fallocate(
841 goto out_unlock; 841 goto out_unlock;
842 } 842 }
843 843
844 ASSERT(offset + len < i_size_read(inode)); 844 /*
845 * There is no need to overlap collapse range with EOF,
846 * in which case it is effectively a truncate operation
847 */
848 if (offset + len >= i_size_read(inode)) {
849 error = -EINVAL;
850 goto out_unlock;
851 }
852
845 new_size = i_size_read(inode) - len; 853 new_size = i_size_read(inode) - len;
846 854
847 error = xfs_collapse_file_space(ip, offset, len); 855 error = xfs_collapse_file_space(ip, offset, len);