aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/super.c2
-rw-r--r--fs/aio.c42
-rw-r--r--fs/autofs4/root.c4
-rw-r--r--fs/btrfs/ctree.h14
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/extent-tree.c6
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode-map.c24
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/send.c5
-rw-r--r--fs/btrfs/super.c22
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/dir.c33
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c71
-rw-r--r--fs/ceph/ioctl.c3
-rw-r--r--fs/ceph/locks.c1
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifsfs.c14
-rw-r--r--fs/cifs/cifsglob.h8
-rw-r--r--fs/cifs/cifsproto.h3
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/file.c35
-rw-r--r--fs/cifs/misc.c74
-rw-r--r--fs/cifs/smb1ops.c11
-rw-r--r--fs/cifs/smb2misc.c18
-rw-r--r--fs/cifs/smb2ops.c14
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/compat.c14
-rw-r--r--fs/coredump.c7
-rw-r--r--fs/dcache.c318
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/extents.c109
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/inode.c53
-rw-r--r--fs/ext4/mballoc.c18
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c51
-rw-r--r--fs/ext4/xattr.c23
-rw-r--r--fs/fcntl.c12
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dir.c146
-rw-r--r--fs/fuse/file.c84
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/kernfs/dir.c9
-rw-r--r--fs/kernfs/file.c2
-rw-r--r--fs/kernfs/inode.c14
-rw-r--r--fs/locks.c55
-rw-r--r--fs/namei.c6
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4xdr.c8
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/open.c21
-rw-r--r--fs/posix_acl.c6
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/file.c92
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/xfs/xfs_aops.c51
-rw-r--r--fs/xfs/xfs_attr.c24
-rw-r--r--fs/xfs/xfs_attr_leaf.c21
-rw-r--r--fs/xfs/xfs_attr_list.c1
-rw-r--r--fs/xfs/xfs_attr_remote.c8
-rw-r--r--fs/xfs/xfs_bmap.c17
-rw-r--r--fs/xfs/xfs_bmap_util.c13
-rw-r--r--fs/xfs/xfs_buf.c16
-rw-r--r--fs/xfs/xfs_da_btree.h2
-rw-r--r--fs/xfs/xfs_file.c12
-rw-r--r--fs/xfs/xfs_inode.c5
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_iops.c41
-rw-r--r--fs/xfs/xfs_log.c63
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_sb.c4
-rw-r--r--fs/xfs/xfs_trace.h1
78 files changed, 1049 insertions, 793 deletions
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 6d589f28bf9b..895ac7dc9dbf 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -340,8 +340,6 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
340 &blocksize,&sbi->s_prefix, 340 &blocksize,&sbi->s_prefix,
341 sbi->s_volume, &mount_flags)) { 341 sbi->s_volume, &mount_flags)) {
342 printk(KERN_ERR "AFFS: Error parsing options\n"); 342 printk(KERN_ERR "AFFS: Error parsing options\n");
343 kfree(sbi->s_prefix);
344 kfree(sbi);
345 return -EINVAL; 343 return -EINVAL;
346 } 344 }
347 /* N.B. after this point s_prefix must be released */ 345 /* N.B. after this point s_prefix must be released */
diff --git a/fs/aio.c b/fs/aio.c
index 12a3de0ee6da..a0ed6c7d2cd2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -112,6 +112,11 @@ struct kioctx {
112 112
113 struct work_struct free_work; 113 struct work_struct free_work;
114 114
115 /*
116 * signals when all in-flight requests are done
117 */
118 struct completion *requests_done;
119
115 struct { 120 struct {
116 /* 121 /*
117 * This counts the number of available slots in the ringbuffer, 122 * This counts the number of available slots in the ringbuffer,
@@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
508{ 513{
509 struct kioctx *ctx = container_of(ref, struct kioctx, reqs); 514 struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
510 515
516 /* At this point we know that there are no any in-flight requests */
517 if (ctx->requests_done)
518 complete(ctx->requests_done);
519
511 INIT_WORK(&ctx->free_work, free_ioctx); 520 INIT_WORK(&ctx->free_work, free_ioctx);
512 schedule_work(&ctx->free_work); 521 schedule_work(&ctx->free_work);
513} 522}
@@ -718,7 +727,8 @@ err:
718 * when the processes owning a context have all exited to encourage 727 * when the processes owning a context have all exited to encourage
719 * the rapid destruction of the kioctx. 728 * the rapid destruction of the kioctx.
720 */ 729 */
721static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) 730static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
731 struct completion *requests_done)
722{ 732{
723 if (!atomic_xchg(&ctx->dead, 1)) { 733 if (!atomic_xchg(&ctx->dead, 1)) {
724 struct kioctx_table *table; 734 struct kioctx_table *table;
@@ -747,7 +757,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
747 if (ctx->mmap_size) 757 if (ctx->mmap_size)
748 vm_munmap(ctx->mmap_base, ctx->mmap_size); 758 vm_munmap(ctx->mmap_base, ctx->mmap_size);
749 759
760 ctx->requests_done = requests_done;
750 percpu_ref_kill(&ctx->users); 761 percpu_ref_kill(&ctx->users);
762 } else {
763 if (requests_done)
764 complete(requests_done);
751 } 765 }
752} 766}
753 767
@@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm)
809 */ 823 */
810 ctx->mmap_size = 0; 824 ctx->mmap_size = 0;
811 825
812 kill_ioctx(mm, ctx); 826 kill_ioctx(mm, ctx, NULL);
813 } 827 }
814} 828}
815 829
@@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
1185 if (!IS_ERR(ioctx)) { 1199 if (!IS_ERR(ioctx)) {
1186 ret = put_user(ioctx->user_id, ctxp); 1200 ret = put_user(ioctx->user_id, ctxp);
1187 if (ret) 1201 if (ret)
1188 kill_ioctx(current->mm, ioctx); 1202 kill_ioctx(current->mm, ioctx, NULL);
1189 percpu_ref_put(&ioctx->users); 1203 percpu_ref_put(&ioctx->users);
1190 } 1204 }
1191 1205
@@ -1203,8 +1217,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
1203{ 1217{
1204 struct kioctx *ioctx = lookup_ioctx(ctx); 1218 struct kioctx *ioctx = lookup_ioctx(ctx);
1205 if (likely(NULL != ioctx)) { 1219 if (likely(NULL != ioctx)) {
1206 kill_ioctx(current->mm, ioctx); 1220 struct completion requests_done =
1221 COMPLETION_INITIALIZER_ONSTACK(requests_done);
1222
1223 /* Pass requests_done to kill_ioctx() where it can be set
1224 * in a thread-safe way. If we try to set it here then we have
1225 * a race condition if two io_destroy() called simultaneously.
1226 */
1227 kill_ioctx(current->mm, ioctx, &requests_done);
1207 percpu_ref_put(&ioctx->users); 1228 percpu_ref_put(&ioctx->users);
1229
1230 /* Wait until all IO for the context are done. Otherwise kernel
1231 * keep using user-space buffers even if user thinks the context
1232 * is destroyed.
1233 */
1234 wait_for_completion(&requests_done);
1235
1208 return 0; 1236 return 0;
1209 } 1237 }
1210 pr_debug("EINVAL: io_destroy: invalid context id\n"); 1238 pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1299,10 +1327,8 @@ rw_common:
1299 &iovec, compat) 1327 &iovec, compat)
1300 : aio_setup_single_vector(req, rw, buf, &nr_segs, 1328 : aio_setup_single_vector(req, rw, buf, &nr_segs,
1301 iovec); 1329 iovec);
1302 if (ret) 1330 if (!ret)
1303 return ret; 1331 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
1304
1305 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
1306 if (ret < 0) { 1332 if (ret < 0) {
1307 if (iovec != &inline_vec) 1333 if (iovec != &inline_vec)
1308 kfree(iovec); 1334 kfree(iovec);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2caf36ac3e93..cc87c1abac97 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
179 spin_lock(&active->d_lock); 179 spin_lock(&active->d_lock);
180 180
181 /* Already gone? */ 181 /* Already gone? */
182 if (!d_count(active)) 182 if ((int) d_count(active) <= 0)
183 goto next; 183 goto next;
184 184
185 qstr = &active->d_name; 185 qstr = &active->d_name;
@@ -230,7 +230,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
230 230
231 spin_lock(&expiring->d_lock); 231 spin_lock(&expiring->d_lock);
232 232
233 /* Bad luck, we've already been dentry_iput */ 233 /* We've already been dentry_iput or unlinked */
234 if (!expiring->d_inode) 234 if (!expiring->d_inode)
235 goto next; 235 goto next;
236 236
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c48df572bd6..ba6b88528dc7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args {
2058#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) 2058#define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt)
2059#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ 2059#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \
2060 BTRFS_MOUNT_##opt) 2060 BTRFS_MOUNT_##opt)
2061#define btrfs_set_and_info(root, opt, fmt, args...) \
2062{ \
2063 if (!btrfs_test_opt(root, opt)) \
2064 btrfs_info(root->fs_info, fmt, ##args); \
2065 btrfs_set_opt(root->fs_info->mount_opt, opt); \
2066}
2067
2068#define btrfs_clear_and_info(root, opt, fmt, args...) \
2069{ \
2070 if (btrfs_test_opt(root, opt)) \
2071 btrfs_info(root->fs_info, fmt, ##args); \
2072 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
2073}
2074
2061/* 2075/*
2062 * Inode flags 2076 * Inode flags
2063 */ 2077 */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 029d46c2e170..983314932af3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2861,7 +2861,7 @@ retry_root_backup:
2861 printk(KERN_ERR "BTRFS: failed to read log tree\n"); 2861 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2862 free_extent_buffer(log_tree_root->node); 2862 free_extent_buffer(log_tree_root->node);
2863 kfree(log_tree_root); 2863 kfree(log_tree_root);
2864 goto fail_trans_kthread; 2864 goto fail_qgroup;
2865 } 2865 }
2866 /* returns with log_tree_root freed on success */ 2866 /* returns with log_tree_root freed on success */
2867 ret = btrfs_recover_log_trees(log_tree_root); 2867 ret = btrfs_recover_log_trees(log_tree_root);
@@ -2870,24 +2870,24 @@ retry_root_backup:
2870 "Failed to recover log tree"); 2870 "Failed to recover log tree");
2871 free_extent_buffer(log_tree_root->node); 2871 free_extent_buffer(log_tree_root->node);
2872 kfree(log_tree_root); 2872 kfree(log_tree_root);
2873 goto fail_trans_kthread; 2873 goto fail_qgroup;
2874 } 2874 }
2875 2875
2876 if (sb->s_flags & MS_RDONLY) { 2876 if (sb->s_flags & MS_RDONLY) {
2877 ret = btrfs_commit_super(tree_root); 2877 ret = btrfs_commit_super(tree_root);
2878 if (ret) 2878 if (ret)
2879 goto fail_trans_kthread; 2879 goto fail_qgroup;
2880 } 2880 }
2881 } 2881 }
2882 2882
2883 ret = btrfs_find_orphan_roots(tree_root); 2883 ret = btrfs_find_orphan_roots(tree_root);
2884 if (ret) 2884 if (ret)
2885 goto fail_trans_kthread; 2885 goto fail_qgroup;
2886 2886
2887 if (!(sb->s_flags & MS_RDONLY)) { 2887 if (!(sb->s_flags & MS_RDONLY)) {
2888 ret = btrfs_cleanup_fs_roots(fs_info); 2888 ret = btrfs_cleanup_fs_roots(fs_info);
2889 if (ret) 2889 if (ret)
2890 goto fail_trans_kthread; 2890 goto fail_qgroup;
2891 2891
2892 ret = btrfs_recover_relocation(tree_root); 2892 ret = btrfs_recover_relocation(tree_root);
2893 if (ret < 0) { 2893 if (ret < 0) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1306487c82cf..5590af92094b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1542,6 +1542,7 @@ again:
1542 ret = 0; 1542 ret = 0;
1543 } 1543 }
1544 if (ret) { 1544 if (ret) {
1545 key.objectid = bytenr;
1545 key.type = BTRFS_EXTENT_ITEM_KEY; 1546 key.type = BTRFS_EXTENT_ITEM_KEY;
1546 key.offset = num_bytes; 1547 key.offset = num_bytes;
1547 btrfs_release_path(path); 1548 btrfs_release_path(path);
@@ -3542,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3542 return extended_to_chunk(flags | tmp); 3543 return extended_to_chunk(flags | tmp);
3543} 3544}
3544 3545
3545static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3546static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
3546{ 3547{
3547 unsigned seq; 3548 unsigned seq;
3549 u64 flags;
3548 3550
3549 do { 3551 do {
3552 flags = orig_flags;
3550 seq = read_seqbegin(&root->fs_info->profiles_lock); 3553 seq = read_seqbegin(&root->fs_info->profiles_lock);
3551 3554
3552 if (flags & BTRFS_BLOCK_GROUP_DATA) 3555 if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -5719,6 +5722,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5719 5722
5720 if (ret > 0 && skinny_metadata) { 5723 if (ret > 0 && skinny_metadata) {
5721 skinny_metadata = false; 5724 skinny_metadata = false;
5725 key.objectid = bytenr;
5722 key.type = BTRFS_EXTENT_ITEM_KEY; 5726 key.type = BTRFS_EXTENT_ITEM_KEY;
5723 key.offset = num_bytes; 5727 key.offset = num_bytes;
5724 btrfs_release_path(path); 5728 btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eb742c07e7a4..ae6af072b635 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -800,7 +800,7 @@ next_slot:
800 if (start > key.offset && end < extent_end) { 800 if (start > key.offset && end < extent_end) {
801 BUG_ON(del_nr > 0); 801 BUG_ON(del_nr > 0);
802 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 802 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
803 ret = -EINVAL; 803 ret = -EOPNOTSUPP;
804 break; 804 break;
805 } 805 }
806 806
@@ -846,7 +846,7 @@ next_slot:
846 */ 846 */
847 if (start <= key.offset && end < extent_end) { 847 if (start <= key.offset && end < extent_end) {
848 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 848 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
849 ret = -EINVAL; 849 ret = -EOPNOTSUPP;
850 break; 850 break;
851 } 851 }
852 852
@@ -872,7 +872,7 @@ next_slot:
872 if (start > key.offset && end >= extent_end) { 872 if (start > key.offset && end >= extent_end) {
873 BUG_ON(del_nr > 0); 873 BUG_ON(del_nr > 0);
874 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 874 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
875 ret = -EINVAL; 875 ret = -EOPNOTSUPP;
876 break; 876 break;
877 } 877 }
878 878
@@ -1777,7 +1777,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1777 start_pos = round_down(pos, root->sectorsize); 1777 start_pos = round_down(pos, root->sectorsize);
1778 if (start_pos > i_size_read(inode)) { 1778 if (start_pos > i_size_read(inode)) {
1779 /* Expand hole size to cover write data, preventing empty gap */ 1779 /* Expand hole size to cover write data, preventing empty gap */
1780 end_pos = round_up(pos + iov->iov_len, root->sectorsize); 1780 end_pos = round_up(pos + count, root->sectorsize);
1781 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); 1781 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
1782 if (err) { 1782 if (err) {
1783 mutex_unlock(&inode->i_mutex); 1783 mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc8ca193d830..86935f5ae291 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root)
176 176
177 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", 177 tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
178 root->root_key.objectid); 178 root->root_key.objectid);
179 BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ 179 if (IS_ERR(tsk)) {
180 btrfs_warn(root->fs_info, "failed to start inode caching task");
181 btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
182 "disabling inode map caching");
183 }
180} 184}
181 185
182int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 186int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -205,24 +209,14 @@ again:
205 209
206void btrfs_return_ino(struct btrfs_root *root, u64 objectid) 210void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
207{ 211{
208 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
209 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; 212 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
210 213
211 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 214 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
212 return; 215 return;
213
214again: 216again:
215 if (root->cached == BTRFS_CACHE_FINISHED) { 217 if (root->cached == BTRFS_CACHE_FINISHED) {
216 __btrfs_add_free_space(ctl, objectid, 1); 218 __btrfs_add_free_space(pinned, objectid, 1);
217 } else { 219 } else {
218 /*
219 * If we are in the process of caching free ino chunks,
220 * to avoid adding the same inode number to the free_ino
221 * tree twice due to cross transaction, we'll leave it
222 * in the pinned tree until a transaction is committed
223 * or the caching work is done.
224 */
225
226 down_write(&root->fs_info->commit_root_sem); 220 down_write(&root->fs_info->commit_root_sem);
227 spin_lock(&root->cache_lock); 221 spin_lock(&root->cache_lock);
228 if (root->cached == BTRFS_CACHE_FINISHED) { 222 if (root->cached == BTRFS_CACHE_FINISHED) {
@@ -234,11 +228,7 @@ again:
234 228
235 start_caching(root); 229 start_caching(root);
236 230
237 if (objectid <= root->cache_progress || 231 __btrfs_add_free_space(pinned, objectid, 1);
238 objectid >= root->highest_objectid)
239 __btrfs_add_free_space(ctl, objectid, 1);
240 else
241 __btrfs_add_free_space(pinned, objectid, 1);
242 232
243 up_write(&root->fs_info->commit_root_sem); 233 up_write(&root->fs_info->commit_root_sem);
244 } 234 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e79ff6b90cb7..2ad7de94efef 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3066,7 +3066,7 @@ process_slot:
3066 new_key.offset + datal, 3066 new_key.offset + datal,
3067 1); 3067 1);
3068 if (ret) { 3068 if (ret) {
3069 if (ret != -EINVAL) 3069 if (ret != -EOPNOTSUPP)
3070 btrfs_abort_transaction(trans, 3070 btrfs_abort_transaction(trans,
3071 root, ret); 3071 root, ret);
3072 btrfs_end_transaction(trans, root); 3072 btrfs_end_transaction(trans, root);
@@ -3141,7 +3141,7 @@ process_slot:
3141 new_key.offset + datal, 3141 new_key.offset + datal,
3142 1); 3142 1);
3143 if (ret) { 3143 if (ret) {
3144 if (ret != -EINVAL) 3144 if (ret != -EOPNOTSUPP)
3145 btrfs_abort_transaction(trans, 3145 btrfs_abort_transaction(trans,
3146 root, ret); 3146 root, ret);
3147 btrfs_end_transaction(trans, root); 3147 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1ac3ca98c429..eb6537a08c1b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
349 if (p->buf_len >= len) 349 if (p->buf_len >= len)
350 return 0; 350 return 0;
351 351
352 if (len > PATH_MAX) {
353 WARN_ON(1);
354 return -ENOMEM;
355 }
356
352 path_len = p->end - p->start; 357 path_len = p->end - p->start;
353 old_buf_len = p->buf_len; 358 old_buf_len = p->buf_len;
354 359
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 5011aadacab8..9601d25a4607 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -385,20 +385,6 @@ static match_table_t tokens = {
385 {Opt_err, NULL}, 385 {Opt_err, NULL},
386}; 386};
387 387
388#define btrfs_set_and_info(root, opt, fmt, args...) \
389{ \
390 if (!btrfs_test_opt(root, opt)) \
391 btrfs_info(root->fs_info, fmt, ##args); \
392 btrfs_set_opt(root->fs_info->mount_opt, opt); \
393}
394
395#define btrfs_clear_and_info(root, opt, fmt, args...) \
396{ \
397 if (btrfs_test_opt(root, opt)) \
398 btrfs_info(root->fs_info, fmt, ##args); \
399 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
400}
401
402/* 388/*
403 * Regular mount options parser. Everything that is needed only when 389 * Regular mount options parser. Everything that is needed only when
404 * reading in a new superblock is parsed here. 390 * reading in a new superblock is parsed here.
@@ -1186,7 +1172,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1186 return ERR_PTR(-ENOMEM); 1172 return ERR_PTR(-ENOMEM);
1187 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, 1173 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
1188 newargs); 1174 newargs);
1189 kfree(newargs);
1190 1175
1191 if (PTR_RET(mnt) == -EBUSY) { 1176 if (PTR_RET(mnt) == -EBUSY) {
1192 if (flags & MS_RDONLY) { 1177 if (flags & MS_RDONLY) {
@@ -1196,17 +1181,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1196 int r; 1181 int r;
1197 mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, 1182 mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
1198 newargs); 1183 newargs);
1199 if (IS_ERR(mnt)) 1184 if (IS_ERR(mnt)) {
1185 kfree(newargs);
1200 return ERR_CAST(mnt); 1186 return ERR_CAST(mnt);
1187 }
1201 1188
1202 r = btrfs_remount(mnt->mnt_sb, &flags, NULL); 1189 r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
1203 if (r < 0) { 1190 if (r < 0) {
1204 /* FIXME: release vfsmount mnt ??*/ 1191 /* FIXME: release vfsmount mnt ??*/
1192 kfree(newargs);
1205 return ERR_PTR(r); 1193 return ERR_PTR(r);
1206 } 1194 }
1207 } 1195 }
1208 } 1196 }
1209 1197
1198 kfree(newargs);
1199
1210 if (IS_ERR(mnt)) 1200 if (IS_ERR(mnt))
1211 return ERR_CAST(mnt); 1201 return ERR_CAST(mnt);
1212 1202
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2e5e648eb5c3..c561b628ebce 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3261 rel->seq = cpu_to_le32(cap->seq); 3261 rel->seq = cpu_to_le32(cap->seq);
3262 rel->issue_seq = cpu_to_le32(cap->issue_seq), 3262 rel->issue_seq = cpu_to_le32(cap->issue_seq),
3263 rel->mseq = cpu_to_le32(cap->mseq); 3263 rel->mseq = cpu_to_le32(cap->mseq);
3264 rel->caps = cpu_to_le32(cap->issued); 3264 rel->caps = cpu_to_le32(cap->implemented);
3265 rel->wanted = cpu_to_le32(cap->mds_wanted); 3265 rel->wanted = cpu_to_le32(cap->mds_wanted);
3266 rel->dname_len = 0; 3266 rel->dname_len = 0;
3267 rel->dname_seq = 0; 3267 rel->dname_seq = 0;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 766410a12c2c..c29d6ae68874 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
141 141
142 /* start at beginning? */ 142 /* start at beginning? */
143 if (ctx->pos == 2 || last == NULL || 143 if (ctx->pos == 2 || last == NULL ||
144 ctx->pos < ceph_dentry(last)->offset) { 144 fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
145 if (list_empty(&parent->d_subdirs)) 145 if (list_empty(&parent->d_subdirs))
146 goto out_unlock; 146 goto out_unlock;
147 p = parent->d_subdirs.prev; 147 p = parent->d_subdirs.prev;
@@ -182,9 +182,16 @@ more:
182 spin_unlock(&dentry->d_lock); 182 spin_unlock(&dentry->d_lock);
183 spin_unlock(&parent->d_lock); 183 spin_unlock(&parent->d_lock);
184 184
185 /* make sure a dentry wasn't dropped while we didn't have parent lock */
186 if (!ceph_dir_is_complete(dir)) {
187 dout(" lost dir complete on %p; falling back to mds\n", dir);
188 dput(dentry);
189 err = -EAGAIN;
190 goto out;
191 }
192
185 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, 193 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
186 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 194 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
187 ctx->pos = di->offset;
188 if (!dir_emit(ctx, dentry->d_name.name, 195 if (!dir_emit(ctx, dentry->d_name.name,
189 dentry->d_name.len, 196 dentry->d_name.len,
190 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), 197 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -198,19 +205,12 @@ more:
198 return 0; 205 return 0;
199 } 206 }
200 207
208 ctx->pos = di->offset + 1;
209
201 if (last) 210 if (last)
202 dput(last); 211 dput(last);
203 last = dentry; 212 last = dentry;
204 213
205 ctx->pos++;
206
207 /* make sure a dentry wasn't dropped while we didn't have parent lock */
208 if (!ceph_dir_is_complete(dir)) {
209 dout(" lost dir complete on %p; falling back to mds\n", dir);
210 err = -EAGAIN;
211 goto out;
212 }
213
214 spin_lock(&parent->d_lock); 214 spin_lock(&parent->d_lock);
215 p = p->prev; /* advance to next dentry */ 215 p = p->prev; /* advance to next dentry */
216 goto more; 216 goto more;
@@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
296 err = __dcache_readdir(file, ctx, shared_gen); 296 err = __dcache_readdir(file, ctx, shared_gen);
297 if (err != -EAGAIN) 297 if (err != -EAGAIN)
298 return err; 298 return err;
299 frag = fpos_frag(ctx->pos);
300 off = fpos_off(ctx->pos);
299 } else { 301 } else {
300 spin_unlock(&ci->i_ceph_lock); 302 spin_unlock(&ci->i_ceph_lock);
301 } 303 }
@@ -446,7 +448,6 @@ more:
446 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { 448 if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
447 dout(" marking %p complete\n", inode); 449 dout(" marking %p complete\n", inode);
448 __ceph_dir_set_complete(ci, fi->dir_release_count); 450 __ceph_dir_set_complete(ci, fi->dir_release_count);
449 ci->i_max_offset = ctx->pos;
450 } 451 }
451 spin_unlock(&ci->i_ceph_lock); 452 spin_unlock(&ci->i_ceph_lock);
452 453
@@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
935 * to do it here. 936 * to do it here.
936 */ 937 */
937 938
938 /* d_move screws up d_subdirs order */
939 ceph_dir_clear_complete(new_dir);
940
941 d_move(old_dentry, new_dentry); 939 d_move(old_dentry, new_dentry);
942 940
943 /* ensure target dentry is invalidated, despite 941 /* ensure target dentry is invalidated, despite
944 rehashing bug in vfs_rename_dir */ 942 rehashing bug in vfs_rename_dir */
945 ceph_invalidate_dentry_lease(new_dentry); 943 ceph_invalidate_dentry_lease(new_dentry);
944
945 /* d_move screws up sibling dentries' offsets */
946 ceph_dir_clear_complete(old_dir);
947 ceph_dir_clear_complete(new_dir);
948
946 } 949 }
947 ceph_mdsc_put_request(req); 950 ceph_mdsc_put_request(req);
948 return err; 951 return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 39da1c2efa50..88a6df4cbe6d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
1221 if (!S_ISREG(inode->i_mode)) 1221 if (!S_ISREG(inode->i_mode))
1222 return -EOPNOTSUPP; 1222 return -EOPNOTSUPP;
1223 1223
1224 if (IS_SWAPFILE(inode))
1225 return -ETXTBSY;
1226
1227 mutex_lock(&inode->i_mutex); 1224 mutex_lock(&inode->i_mutex);
1228 1225
1229 if (ceph_snap(inode) != CEPH_NOSNAP) { 1226 if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0b0728e5be2d..233c6f96910a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
744 !__ceph_dir_is_complete(ci)) { 744 !__ceph_dir_is_complete(ci)) {
745 dout(" marking %p complete (empty)\n", inode); 745 dout(" marking %p complete (empty)\n", inode);
746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); 746 __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
747 ci->i_max_offset = 2;
748 } 747 }
749no_change: 748no_change:
750 /* only update max_size on auth cap */ 749 /* only update max_size on auth cap */
@@ -890,41 +889,6 @@ out_unlock:
890} 889}
891 890
892/* 891/*
893 * Set dentry's directory position based on the current dir's max, and
894 * order it in d_subdirs, so that dcache_readdir behaves.
895 *
896 * Always called under directory's i_mutex.
897 */
898static void ceph_set_dentry_offset(struct dentry *dn)
899{
900 struct dentry *dir = dn->d_parent;
901 struct inode *inode = dir->d_inode;
902 struct ceph_inode_info *ci;
903 struct ceph_dentry_info *di;
904
905 BUG_ON(!inode);
906
907 ci = ceph_inode(inode);
908 di = ceph_dentry(dn);
909
910 spin_lock(&ci->i_ceph_lock);
911 if (!__ceph_dir_is_complete(ci)) {
912 spin_unlock(&ci->i_ceph_lock);
913 return;
914 }
915 di->offset = ceph_inode(inode)->i_max_offset++;
916 spin_unlock(&ci->i_ceph_lock);
917
918 spin_lock(&dir->d_lock);
919 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
920 list_move(&dn->d_u.d_child, &dir->d_subdirs);
921 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
922 dn->d_u.d_child.prev, dn->d_u.d_child.next);
923 spin_unlock(&dn->d_lock);
924 spin_unlock(&dir->d_lock);
925}
926
927/*
928 * splice a dentry to an inode. 892 * splice a dentry to an inode.
929 * caller must hold directory i_mutex for this to be safe. 893 * caller must hold directory i_mutex for this to be safe.
930 * 894 *
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
933 * the caller) if we fail. 897 * the caller) if we fail.
934 */ 898 */
935static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 899static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
936 bool *prehash, bool set_offset) 900 bool *prehash)
937{ 901{
938 struct dentry *realdn; 902 struct dentry *realdn;
939 903
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
965 } 929 }
966 if ((!prehash || *prehash) && d_unhashed(dn)) 930 if ((!prehash || *prehash) && d_unhashed(dn))
967 d_rehash(dn); 931 d_rehash(dn);
968 if (set_offset)
969 ceph_set_dentry_offset(dn);
970out: 932out:
971 return dn; 933 return dn;
972} 934}
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
987{ 949{
988 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 950 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
989 struct inode *in = NULL; 951 struct inode *in = NULL;
990 struct ceph_mds_reply_inode *ininfo;
991 struct ceph_vino vino; 952 struct ceph_vino vino;
992 struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 953 struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
993 int err = 0; 954 int err = 0;
@@ -1161,6 +1122,9 @@ retry_lookup:
1161 1122
1162 /* rename? */ 1123 /* rename? */
1163 if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { 1124 if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
1125 struct inode *olddir = req->r_old_dentry_dir;
1126 BUG_ON(!olddir);
1127
1164 dout(" src %p '%.*s' dst %p '%.*s'\n", 1128 dout(" src %p '%.*s' dst %p '%.*s'\n",
1165 req->r_old_dentry, 1129 req->r_old_dentry,
1166 req->r_old_dentry->d_name.len, 1130 req->r_old_dentry->d_name.len,
@@ -1180,13 +1144,10 @@ retry_lookup:
1180 rehashing bug in vfs_rename_dir */ 1144 rehashing bug in vfs_rename_dir */
1181 ceph_invalidate_dentry_lease(dn); 1145 ceph_invalidate_dentry_lease(dn);
1182 1146
1183 /* 1147 /* d_move screws up sibling dentries' offsets */
1184 * d_move() puts the renamed dentry at the end of 1148 ceph_dir_clear_complete(dir);
1185 * d_subdirs. We need to assign it an appropriate 1149 ceph_dir_clear_complete(olddir);
1186 * directory offset so we can behave when dir is 1150
1187 * complete.
1188 */
1189 ceph_set_dentry_offset(req->r_old_dentry);
1190 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1151 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1191 ceph_dentry(req->r_old_dentry)->offset); 1152 ceph_dentry(req->r_old_dentry)->offset);
1192 1153
@@ -1213,8 +1174,9 @@ retry_lookup:
1213 1174
1214 /* attach proper inode */ 1175 /* attach proper inode */
1215 if (!dn->d_inode) { 1176 if (!dn->d_inode) {
1177 ceph_dir_clear_complete(dir);
1216 ihold(in); 1178 ihold(in);
1217 dn = splice_dentry(dn, in, &have_lease, true); 1179 dn = splice_dentry(dn, in, &have_lease);
1218 if (IS_ERR(dn)) { 1180 if (IS_ERR(dn)) {
1219 err = PTR_ERR(dn); 1181 err = PTR_ERR(dn);
1220 goto done; 1182 goto done;
@@ -1235,17 +1197,16 @@ retry_lookup:
1235 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 1197 (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
1236 req->r_op == CEPH_MDS_OP_MKSNAP)) { 1198 req->r_op == CEPH_MDS_OP_MKSNAP)) {
1237 struct dentry *dn = req->r_dentry; 1199 struct dentry *dn = req->r_dentry;
1200 struct inode *dir = req->r_locked_dir;
1238 1201
1239 /* fill out a snapdir LOOKUPSNAP dentry */ 1202 /* fill out a snapdir LOOKUPSNAP dentry */
1240 BUG_ON(!dn); 1203 BUG_ON(!dn);
1241 BUG_ON(!req->r_locked_dir); 1204 BUG_ON(!dir);
1242 BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); 1205 BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
1243 ininfo = rinfo->targeti.in;
1244 vino.ino = le64_to_cpu(ininfo->ino);
1245 vino.snap = le64_to_cpu(ininfo->snapid);
1246 dout(" linking snapped dir %p to dn %p\n", in, dn); 1206 dout(" linking snapped dir %p to dn %p\n", in, dn);
1207 ceph_dir_clear_complete(dir);
1247 ihold(in); 1208 ihold(in);
1248 dn = splice_dentry(dn, in, NULL, true); 1209 dn = splice_dentry(dn, in, NULL);
1249 if (IS_ERR(dn)) { 1210 if (IS_ERR(dn)) {
1250 err = PTR_ERR(dn); 1211 err = PTR_ERR(dn);
1251 goto done; 1212 goto done;
@@ -1407,7 +1368,7 @@ retry_lookup:
1407 } 1368 }
1408 1369
1409 if (!dn->d_inode) { 1370 if (!dn->d_inode) {
1410 dn = splice_dentry(dn, in, NULL, false); 1371 dn = splice_dentry(dn, in, NULL);
1411 if (IS_ERR(dn)) { 1372 if (IS_ERR(dn)) {
1412 err = PTR_ERR(dn); 1373 err = PTR_ERR(dn);
1413 dn = NULL; 1374 dn = NULL;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index fdf941b44ff1..a822a6e58290 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
109 return PTR_ERR(req); 109 return PTR_ERR(req);
110 req->r_inode = inode; 110 req->r_inode = inode;
111 ihold(inode); 111 ihold(inode);
112 req->r_num_caps = 1;
113
112 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; 114 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
113 115
114 req->r_args.setlayout.layout.fl_stripe_unit = 116 req->r_args.setlayout.layout.fl_stripe_unit =
@@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
153 return PTR_ERR(req); 155 return PTR_ERR(req);
154 req->r_inode = inode; 156 req->r_inode = inode;
155 ihold(inode); 157 ihold(inode);
158 req->r_num_caps = 1;
156 159
157 req->r_args.setlayout.layout.fl_stripe_unit = 160 req->r_args.setlayout.layout.fl_stripe_unit =
158 cpu_to_le32(l.stripe_unit); 161 cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index d94ba0df9f4d..191398852a2e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
45 return PTR_ERR(req); 45 return PTR_ERR(req);
46 req->r_inode = inode; 46 req->r_inode = inode;
47 ihold(inode); 47 ihold(inode);
48 req->r_num_caps = 1;
48 49
49 /* mds requires start and length rather than start and end */ 50 /* mds requires start and length rather than start and end */
50 if (LLONG_MAX == fl->fl_end) 51 if (LLONG_MAX == fl->fl_end)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7866cd05a6bb..ead05cc1f447 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -266,7 +266,6 @@ struct ceph_inode_info {
266 struct timespec i_rctime; 266 struct timespec i_rctime;
267 u64 i_rbytes, i_rfiles, i_rsubdirs; 267 u64 i_rbytes, i_rfiles, i_rsubdirs;
268 u64 i_files, i_subdirs; 268 u64 i_files, i_subdirs;
269 u64 i_max_offset; /* largest readdir offset, set with complete dir */
270 269
271 struct rb_root i_fragtree; 270 struct rb_root i_fragtree;
272 struct mutex i_fragtree_mutex; 271 struct mutex i_fragtree_mutex;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index df9c9141c099..5be1f997ecde 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -253,6 +253,11 @@ cifs_alloc_inode(struct super_block *sb)
253 cifs_set_oplock_level(cifs_inode, 0); 253 cifs_set_oplock_level(cifs_inode, 0);
254 cifs_inode->delete_pending = false; 254 cifs_inode->delete_pending = false;
255 cifs_inode->invalid_mapping = false; 255 cifs_inode->invalid_mapping = false;
256 clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags);
257 clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags);
258 clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags);
259 spin_lock_init(&cifs_inode->writers_lock);
260 cifs_inode->writers = 0;
256 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 261 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
257 cifs_inode->server_eof = 0; 262 cifs_inode->server_eof = 0;
258 cifs_inode->uniqueid = 0; 263 cifs_inode->uniqueid = 0;
@@ -732,19 +737,26 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
732 unsigned long nr_segs, loff_t pos) 737 unsigned long nr_segs, loff_t pos)
733{ 738{
734 struct inode *inode = file_inode(iocb->ki_filp); 739 struct inode *inode = file_inode(iocb->ki_filp);
740 struct cifsInodeInfo *cinode = CIFS_I(inode);
735 ssize_t written; 741 ssize_t written;
736 int rc; 742 int rc;
737 743
744 written = cifs_get_writer(cinode);
745 if (written)
746 return written;
747
738 written = generic_file_aio_write(iocb, iov, nr_segs, pos); 748 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
739 749
740 if (CIFS_CACHE_WRITE(CIFS_I(inode))) 750 if (CIFS_CACHE_WRITE(CIFS_I(inode)))
741 return written; 751 goto out;
742 752
743 rc = filemap_fdatawrite(inode->i_mapping); 753 rc = filemap_fdatawrite(inode->i_mapping);
744 if (rc) 754 if (rc)
745 cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", 755 cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
746 rc, inode); 756 rc, inode);
747 757
758out:
759 cifs_put_writer(cinode);
748 return written; 760 return written;
749} 761}
750 762
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c0f3718b77a8..30f6e9251a4a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -228,6 +228,8 @@ struct smb_version_operations {
228 /* verify the message */ 228 /* verify the message */
229 int (*check_message)(char *, unsigned int); 229 int (*check_message)(char *, unsigned int);
230 bool (*is_oplock_break)(char *, struct TCP_Server_Info *); 230 bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
231 void (*downgrade_oplock)(struct TCP_Server_Info *,
232 struct cifsInodeInfo *, bool);
231 /* process transaction2 response */ 233 /* process transaction2 response */
232 bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, 234 bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
233 char *, int); 235 char *, int);
@@ -1113,6 +1115,12 @@ struct cifsInodeInfo {
1113 unsigned int epoch; /* used to track lease state changes */ 1115 unsigned int epoch; /* used to track lease state changes */
1114 bool delete_pending; /* DELETE_ON_CLOSE is set */ 1116 bool delete_pending; /* DELETE_ON_CLOSE is set */
1115 bool invalid_mapping; /* pagecache is invalid */ 1117 bool invalid_mapping; /* pagecache is invalid */
1118 unsigned long flags;
1119#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
1120#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
1121#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
1122 spinlock_t writers_lock;
1123 unsigned int writers; /* Number of writers on this inode */
1116 unsigned long time; /* jiffies of last update of inode */ 1124 unsigned long time; /* jiffies of last update of inode */
1117 u64 server_eof; /* current file size on server -- protected by i_lock */ 1125 u64 server_eof; /* current file size on server -- protected by i_lock */
1118 u64 uniqueid; /* server inode number */ 1126 u64 uniqueid; /* server inode number */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index acc4ee8ed075..ca7980a1e303 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
127extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 127extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
128 int offset); 128 int offset);
129extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); 129extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
130extern int cifs_get_writer(struct cifsInodeInfo *cinode);
131extern void cifs_put_writer(struct cifsInodeInfo *cinode);
132extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode);
130extern int cifs_unlock_range(struct cifsFileInfo *cfile, 133extern int cifs_unlock_range(struct cifsFileInfo *cfile,
131 struct file_lock *flock, const unsigned int xid); 134 struct file_lock *flock, const unsigned int xid);
132extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); 135extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f3264bd7a83d..6ce4e0954b98 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -6197,6 +6197,9 @@ QAllEAsRetry:
6197 cifs_dbg(FYI, "ea length %d\n", list_len); 6197 cifs_dbg(FYI, "ea length %d\n", list_len);
6198 if (list_len <= 8) { 6198 if (list_len <= 8) {
6199 cifs_dbg(FYI, "empty EA list returned from server\n"); 6199 cifs_dbg(FYI, "empty EA list returned from server\n");
6200 /* didn't find the named attribute */
6201 if (ea_name)
6202 rc = -ENODATA;
6200 goto QAllEAsOut; 6203 goto QAllEAsOut;
6201 } 6204 }
6202 6205
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8add25538a3b..5ed03e0b8b40 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2599,7 +2599,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2599 ssize_t err; 2599 ssize_t err;
2600 2600
2601 err = generic_write_sync(file, iocb->ki_pos - rc, rc); 2601 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2602 if (rc < 0) 2602 if (err < 0)
2603 rc = err; 2603 rc = err;
2604 } 2604 }
2605 } else { 2605 } else {
@@ -2621,12 +2621,20 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2621 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 2621 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2622 ssize_t written; 2622 ssize_t written;
2623 2623
2624 written = cifs_get_writer(cinode);
2625 if (written)
2626 return written;
2627
2624 if (CIFS_CACHE_WRITE(cinode)) { 2628 if (CIFS_CACHE_WRITE(cinode)) {
2625 if (cap_unix(tcon->ses) && 2629 if (cap_unix(tcon->ses) &&
2626 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) 2630 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2627 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) 2631 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2628 return generic_file_aio_write(iocb, iov, nr_segs, pos); 2632 written = generic_file_aio_write(
2629 return cifs_writev(iocb, iov, nr_segs, pos); 2633 iocb, iov, nr_segs, pos);
2634 goto out;
2635 }
2636 written = cifs_writev(iocb, iov, nr_segs, pos);
2637 goto out;
2630 } 2638 }
2631 /* 2639 /*
2632 * For non-oplocked files in strict cache mode we need to write the data 2640 * For non-oplocked files in strict cache mode we need to write the data
@@ -2646,6 +2654,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2646 inode); 2654 inode);
2647 cinode->oplock = 0; 2655 cinode->oplock = 0;
2648 } 2656 }
2657out:
2658 cifs_put_writer(cinode);
2649 return written; 2659 return written;
2650} 2660}
2651 2661
@@ -2872,7 +2882,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2872 cifs_uncached_readv_complete); 2882 cifs_uncached_readv_complete);
2873 if (!rdata) { 2883 if (!rdata) {
2874 rc = -ENOMEM; 2884 rc = -ENOMEM;
2875 goto error; 2885 break;
2876 } 2886 }
2877 2887
2878 rc = cifs_read_allocate_pages(rdata, npages); 2888 rc = cifs_read_allocate_pages(rdata, npages);
@@ -3621,6 +3631,13 @@ static int cifs_launder_page(struct page *page)
3621 return rc; 3631 return rc;
3622} 3632}
3623 3633
3634static int
3635cifs_pending_writers_wait(void *unused)
3636{
3637 schedule();
3638 return 0;
3639}
3640
3624void cifs_oplock_break(struct work_struct *work) 3641void cifs_oplock_break(struct work_struct *work)
3625{ 3642{
3626 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 3643 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3628,8 +3645,15 @@ void cifs_oplock_break(struct work_struct *work)
3628 struct inode *inode = cfile->dentry->d_inode; 3645 struct inode *inode = cfile->dentry->d_inode;
3629 struct cifsInodeInfo *cinode = CIFS_I(inode); 3646 struct cifsInodeInfo *cinode = CIFS_I(inode);
3630 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); 3647 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3648 struct TCP_Server_Info *server = tcon->ses->server;
3631 int rc = 0; 3649 int rc = 0;
3632 3650
3651 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3652 cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
3653
3654 server->ops->downgrade_oplock(server, cinode,
3655 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3656
3633 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && 3657 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3634 cifs_has_mand_locks(cinode)) { 3658 cifs_has_mand_locks(cinode)) {
3635 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", 3659 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
@@ -3666,6 +3690,7 @@ void cifs_oplock_break(struct work_struct *work)
3666 cinode); 3690 cinode);
3667 cifs_dbg(FYI, "Oplock release rc = %d\n", rc); 3691 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3668 } 3692 }
3693 cifs_done_oplock_break(cinode);
3669} 3694}
3670 3695
3671/* 3696/*
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2f9f3790679d..3b0c62e622da 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
466 cifs_dbg(FYI, "file id match, oplock break\n"); 466 cifs_dbg(FYI, "file id match, oplock break\n");
467 pCifsInode = CIFS_I(netfile->dentry->d_inode); 467 pCifsInode = CIFS_I(netfile->dentry->d_inode);
468 468
469 cifs_set_oplock_level(pCifsInode, 469 set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
470 pSMB->OplockLevel ? OPLOCK_READ : 0); 470 &pCifsInode->flags);
471
472 /*
473 * Set flag if the server downgrades the oplock
474 * to L2 else clear.
475 */
476 if (pSMB->OplockLevel)
477 set_bit(
478 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
479 &pCifsInode->flags);
480 else
481 clear_bit(
482 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
483 &pCifsInode->flags);
484
471 queue_work(cifsiod_wq, 485 queue_work(cifsiod_wq,
472 &netfile->oplock_break); 486 &netfile->oplock_break);
473 netfile->oplock_break_cancelled = false; 487 netfile->oplock_break_cancelled = false;
@@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
551 cinode->oplock = 0; 565 cinode->oplock = 0;
552} 566}
553 567
568static int
569cifs_oplock_break_wait(void *unused)
570{
571 schedule();
572 return signal_pending(current) ? -ERESTARTSYS : 0;
573}
574
575/*
576 * We wait for oplock breaks to be processed before we attempt to perform
577 * writes.
578 */
579int cifs_get_writer(struct cifsInodeInfo *cinode)
580{
581 int rc;
582
583start:
584 rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
585 cifs_oplock_break_wait, TASK_KILLABLE);
586 if (rc)
587 return rc;
588
589 spin_lock(&cinode->writers_lock);
590 if (!cinode->writers)
591 set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
592 cinode->writers++;
593 /* Check to see if we have started servicing an oplock break */
594 if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) {
595 cinode->writers--;
596 if (cinode->writers == 0) {
597 clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
598 wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
599 }
600 spin_unlock(&cinode->writers_lock);
601 goto start;
602 }
603 spin_unlock(&cinode->writers_lock);
604 return 0;
605}
606
607void cifs_put_writer(struct cifsInodeInfo *cinode)
608{
609 spin_lock(&cinode->writers_lock);
610 cinode->writers--;
611 if (cinode->writers == 0) {
612 clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
613 wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
614 }
615 spin_unlock(&cinode->writers_lock);
616}
617
618void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
619{
620 clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
621 wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK);
622}
623
554bool 624bool
555backup_cred(struct cifs_sb_info *cifs_sb) 625backup_cred(struct cifs_sb_info *cifs_sb)
556{ 626{
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 526fb89f9230..d1fdfa848703 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
372 return 0; 372 return 0;
373} 373}
374 374
375static void
376cifs_downgrade_oplock(struct TCP_Server_Info *server,
377 struct cifsInodeInfo *cinode, bool set_level2)
378{
379 if (set_level2)
380 cifs_set_oplock_level(cinode, OPLOCK_READ);
381 else
382 cifs_set_oplock_level(cinode, 0);
383}
384
375static bool 385static bool
376cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server, 386cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server,
377 char *buf, int malformed) 387 char *buf, int malformed)
@@ -1019,6 +1029,7 @@ struct smb_version_operations smb1_operations = {
1019 .clear_stats = cifs_clear_stats, 1029 .clear_stats = cifs_clear_stats,
1020 .print_stats = cifs_print_stats, 1030 .print_stats = cifs_print_stats,
1021 .is_oplock_break = is_valid_oplock_break, 1031 .is_oplock_break = is_valid_oplock_break,
1032 .downgrade_oplock = cifs_downgrade_oplock,
1022 .check_trans2 = cifs_check_trans2, 1033 .check_trans2 = cifs_check_trans2,
1023 .need_neg = cifs_need_neg, 1034 .need_neg = cifs_need_neg,
1024 .negotiate = cifs_negotiate, 1035 .negotiate = cifs_negotiate,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index fb3966265b6e..b8021fde987d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
575 else 575 else
576 cfile->oplock_break_cancelled = false; 576 cfile->oplock_break_cancelled = false;
577 577
578 server->ops->set_oplock_level(cinode, 578 set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
579 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0, 579 &cinode->flags);
580 0, NULL); 580
581 /*
582 * Set flag if the server downgrades the oplock
583 * to L2 else clear.
584 */
585 if (rsp->OplockLevel)
586 set_bit(
587 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
588 &cinode->flags);
589 else
590 clear_bit(
591 CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
592 &cinode->flags);
581 593
582 queue_work(cifsiod_wq, &cfile->oplock_break); 594 queue_work(cifsiod_wq, &cfile->oplock_break);
583 595
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 192f51a12cf1..35ddc3ed119d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -905,6 +905,17 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
905} 905}
906 906
907static void 907static void
908smb2_downgrade_oplock(struct TCP_Server_Info *server,
909 struct cifsInodeInfo *cinode, bool set_level2)
910{
911 if (set_level2)
912 server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II,
913 0, NULL);
914 else
915 server->ops->set_oplock_level(cinode, 0, 0, NULL);
916}
917
918static void
908smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, 919smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
909 unsigned int epoch, bool *purge_cache) 920 unsigned int epoch, bool *purge_cache)
910{ 921{
@@ -1110,6 +1121,7 @@ struct smb_version_operations smb20_operations = {
1110 .clear_stats = smb2_clear_stats, 1121 .clear_stats = smb2_clear_stats,
1111 .print_stats = smb2_print_stats, 1122 .print_stats = smb2_print_stats,
1112 .is_oplock_break = smb2_is_valid_oplock_break, 1123 .is_oplock_break = smb2_is_valid_oplock_break,
1124 .downgrade_oplock = smb2_downgrade_oplock,
1113 .need_neg = smb2_need_neg, 1125 .need_neg = smb2_need_neg,
1114 .negotiate = smb2_negotiate, 1126 .negotiate = smb2_negotiate,
1115 .negotiate_wsize = smb2_negotiate_wsize, 1127 .negotiate_wsize = smb2_negotiate_wsize,
@@ -1184,6 +1196,7 @@ struct smb_version_operations smb21_operations = {
1184 .clear_stats = smb2_clear_stats, 1196 .clear_stats = smb2_clear_stats,
1185 .print_stats = smb2_print_stats, 1197 .print_stats = smb2_print_stats,
1186 .is_oplock_break = smb2_is_valid_oplock_break, 1198 .is_oplock_break = smb2_is_valid_oplock_break,
1199 .downgrade_oplock = smb2_downgrade_oplock,
1187 .need_neg = smb2_need_neg, 1200 .need_neg = smb2_need_neg,
1188 .negotiate = smb2_negotiate, 1201 .negotiate = smb2_negotiate,
1189 .negotiate_wsize = smb2_negotiate_wsize, 1202 .negotiate_wsize = smb2_negotiate_wsize,
@@ -1259,6 +1272,7 @@ struct smb_version_operations smb30_operations = {
1259 .print_stats = smb2_print_stats, 1272 .print_stats = smb2_print_stats,
1260 .dump_share_caps = smb2_dump_share_caps, 1273 .dump_share_caps = smb2_dump_share_caps,
1261 .is_oplock_break = smb2_is_valid_oplock_break, 1274 .is_oplock_break = smb2_is_valid_oplock_break,
1275 .downgrade_oplock = smb2_downgrade_oplock,
1262 .need_neg = smb2_need_neg, 1276 .need_neg = smb2_need_neg,
1263 .negotiate = smb2_negotiate, 1277 .negotiate = smb2_negotiate,
1264 .negotiate_wsize = smb2_negotiate_wsize, 1278 .negotiate_wsize = smb2_negotiate_wsize,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 860344701067..3802f8c94acc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1352,7 +1352,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
1352 u64 persistent_fid, u64 volatile_fid) 1352 u64 persistent_fid, u64 volatile_fid)
1353{ 1353{
1354 int rc; 1354 int rc;
1355 char *res_key = NULL;
1356 struct compress_ioctl fsctl_input; 1355 struct compress_ioctl fsctl_input;
1357 char *ret_data = NULL; 1356 char *ret_data = NULL;
1358 1357
@@ -1365,7 +1364,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
1365 2 /* in data len */, &ret_data /* out data */, NULL); 1364 2 /* in data len */, &ret_data /* out data */, NULL);
1366 1365
1367 cifs_dbg(FYI, "set compression rc %d\n", rc); 1366 cifs_dbg(FYI, "set compression rc %d\n", rc);
1368 kfree(res_key);
1369 1367
1370 return rc; 1368 return rc;
1371} 1369}
diff --git a/fs/compat.c b/fs/compat.c
index ca926ad0430c..66d3d3c6b4b2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
457 case F_GETLK64: 457 case F_GETLK64:
458 case F_SETLK64: 458 case F_SETLK64:
459 case F_SETLKW64: 459 case F_SETLKW64:
460 case F_GETLKP: 460 case F_OFD_GETLK:
461 case F_SETLKP: 461 case F_OFD_SETLK:
462 case F_SETLKPW: 462 case F_OFD_SETLKW:
463 ret = get_compat_flock64(&f, compat_ptr(arg)); 463 ret = get_compat_flock64(&f, compat_ptr(arg));
464 if (ret != 0) 464 if (ret != 0)
465 break; 465 break;
@@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
468 conv_cmd = convert_fcntl_cmd(cmd); 468 conv_cmd = convert_fcntl_cmd(cmd);
469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); 469 ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
470 set_fs(old_fs); 470 set_fs(old_fs);
471 if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { 471 if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
472 /* need to return lock information - see above for commentary */ 472 /* need to return lock information - see above for commentary */
473 if (f.l_start > COMPAT_LOFF_T_MAX) 473 if (f.l_start > COMPAT_LOFF_T_MAX)
474 ret = -EOVERFLOW; 474 ret = -EOVERFLOW;
@@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
493 case F_GETLK64: 493 case F_GETLK64:
494 case F_SETLK64: 494 case F_SETLK64:
495 case F_SETLKW64: 495 case F_SETLKW64:
496 case F_GETLKP: 496 case F_OFD_GETLK:
497 case F_SETLKP: 497 case F_OFD_SETLK:
498 case F_SETLKPW: 498 case F_OFD_SETLKW:
499 return -EINVAL; 499 return -EINVAL;
500 } 500 }
501 return compat_sys_fcntl64(fd, cmd, arg); 501 return compat_sys_fcntl64(fd, cmd, arg);
diff --git a/fs/coredump.c b/fs/coredump.c
index e3ad709a4232..0b2528fb640e 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -73,10 +73,15 @@ static int expand_corename(struct core_name *cn, int size)
73static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) 73static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg)
74{ 74{
75 int free, need; 75 int free, need;
76 va_list arg_copy;
76 77
77again: 78again:
78 free = cn->size - cn->used; 79 free = cn->size - cn->used;
79 need = vsnprintf(cn->corename + cn->used, free, fmt, arg); 80
81 va_copy(arg_copy, arg);
82 need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
83 va_end(arg_copy);
84
80 if (need < free) { 85 if (need < free) {
81 cn->used += need; 86 cn->used += need;
82 return 0; 87 return 0;
diff --git a/fs/dcache.c b/fs/dcache.c
index 40707d88a945..42ae01eefc07 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -246,16 +246,8 @@ static void __d_free(struct rcu_head *head)
246 kmem_cache_free(dentry_cache, dentry); 246 kmem_cache_free(dentry_cache, dentry);
247} 247}
248 248
249/* 249static void dentry_free(struct dentry *dentry)
250 * no locks, please.
251 */
252static void d_free(struct dentry *dentry)
253{ 250{
254 BUG_ON((int)dentry->d_lockref.count > 0);
255 this_cpu_dec(nr_dentry);
256 if (dentry->d_op && dentry->d_op->d_release)
257 dentry->d_op->d_release(dentry);
258
259 /* if dentry was never visible to RCU, immediate free is OK */ 251 /* if dentry was never visible to RCU, immediate free is OK */
260 if (!(dentry->d_flags & DCACHE_RCUACCESS)) 252 if (!(dentry->d_flags & DCACHE_RCUACCESS))
261 __d_free(&dentry->d_u.d_rcu); 253 __d_free(&dentry->d_u.d_rcu);
@@ -403,56 +395,6 @@ static void dentry_lru_add(struct dentry *dentry)
403 d_lru_add(dentry); 395 d_lru_add(dentry);
404} 396}
405 397
406/*
407 * Remove a dentry with references from the LRU.
408 *
409 * If we are on the shrink list, then we can get to try_prune_one_dentry() and
410 * lose our last reference through the parent walk. In this case, we need to
411 * remove ourselves from the shrink list, not the LRU.
412 */
413static void dentry_lru_del(struct dentry *dentry)
414{
415 if (dentry->d_flags & DCACHE_LRU_LIST) {
416 if (dentry->d_flags & DCACHE_SHRINK_LIST)
417 return d_shrink_del(dentry);
418 d_lru_del(dentry);
419 }
420}
421
422/**
423 * d_kill - kill dentry and return parent
424 * @dentry: dentry to kill
425 * @parent: parent dentry
426 *
427 * The dentry must already be unhashed and removed from the LRU.
428 *
429 * If this is the root of the dentry tree, return NULL.
430 *
431 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
432 * d_kill.
433 */
434static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
435 __releases(dentry->d_lock)
436 __releases(parent->d_lock)
437 __releases(dentry->d_inode->i_lock)
438{
439 list_del(&dentry->d_u.d_child);
440 /*
441 * Inform d_walk() that we are no longer attached to the
442 * dentry tree
443 */
444 dentry->d_flags |= DCACHE_DENTRY_KILLED;
445 if (parent)
446 spin_unlock(&parent->d_lock);
447 dentry_iput(dentry);
448 /*
449 * dentry_iput drops the locks, at which point nobody (except
450 * transient RCU lookups) can reach this dentry.
451 */
452 d_free(dentry);
453 return parent;
454}
455
456/** 398/**
457 * d_drop - drop a dentry 399 * d_drop - drop a dentry
458 * @dentry: dentry to drop 400 * @dentry: dentry to drop
@@ -510,7 +452,14 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure)
510 __releases(dentry->d_lock) 452 __releases(dentry->d_lock)
511{ 453{
512 struct inode *inode; 454 struct inode *inode;
513 struct dentry *parent; 455 struct dentry *parent = NULL;
456 bool can_free = true;
457
458 if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
459 can_free = dentry->d_flags & DCACHE_MAY_FREE;
460 spin_unlock(&dentry->d_lock);
461 goto out;
462 }
514 463
515 inode = dentry->d_inode; 464 inode = dentry->d_inode;
516 if (inode && !spin_trylock(&inode->i_lock)) { 465 if (inode && !spin_trylock(&inode->i_lock)) {
@@ -521,9 +470,7 @@ relock:
521 } 470 }
522 return dentry; /* try again with same dentry */ 471 return dentry; /* try again with same dentry */
523 } 472 }
524 if (IS_ROOT(dentry)) 473 if (!IS_ROOT(dentry))
525 parent = NULL;
526 else
527 parent = dentry->d_parent; 474 parent = dentry->d_parent;
528 if (parent && !spin_trylock(&parent->d_lock)) { 475 if (parent && !spin_trylock(&parent->d_lock)) {
529 if (inode) 476 if (inode)
@@ -543,10 +490,40 @@ relock:
543 if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) 490 if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
544 dentry->d_op->d_prune(dentry); 491 dentry->d_op->d_prune(dentry);
545 492
546 dentry_lru_del(dentry); 493 if (dentry->d_flags & DCACHE_LRU_LIST) {
494 if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
495 d_lru_del(dentry);
496 }
547 /* if it was on the hash then remove it */ 497 /* if it was on the hash then remove it */
548 __d_drop(dentry); 498 __d_drop(dentry);
549 return d_kill(dentry, parent); 499 list_del(&dentry->d_u.d_child);
500 /*
501 * Inform d_walk() that we are no longer attached to the
502 * dentry tree
503 */
504 dentry->d_flags |= DCACHE_DENTRY_KILLED;
505 if (parent)
506 spin_unlock(&parent->d_lock);
507 dentry_iput(dentry);
508 /*
509 * dentry_iput drops the locks, at which point nobody (except
510 * transient RCU lookups) can reach this dentry.
511 */
512 BUG_ON((int)dentry->d_lockref.count > 0);
513 this_cpu_dec(nr_dentry);
514 if (dentry->d_op && dentry->d_op->d_release)
515 dentry->d_op->d_release(dentry);
516
517 spin_lock(&dentry->d_lock);
518 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
519 dentry->d_flags |= DCACHE_MAY_FREE;
520 can_free = false;
521 }
522 spin_unlock(&dentry->d_lock);
523out:
524 if (likely(can_free))
525 dentry_free(dentry);
526 return parent;
550} 527}
551 528
552/* 529/*
@@ -815,65 +792,13 @@ restart:
815} 792}
816EXPORT_SYMBOL(d_prune_aliases); 793EXPORT_SYMBOL(d_prune_aliases);
817 794
818/*
819 * Try to throw away a dentry - free the inode, dput the parent.
820 * Requires dentry->d_lock is held, and dentry->d_count == 0.
821 * Releases dentry->d_lock.
822 *
823 * This may fail if locks cannot be acquired no problem, just try again.
824 */
825static struct dentry * try_prune_one_dentry(struct dentry *dentry)
826 __releases(dentry->d_lock)
827{
828 struct dentry *parent;
829
830 parent = dentry_kill(dentry, 0);
831 /*
832 * If dentry_kill returns NULL, we have nothing more to do.
833 * if it returns the same dentry, trylocks failed. In either
834 * case, just loop again.
835 *
836 * Otherwise, we need to prune ancestors too. This is necessary
837 * to prevent quadratic behavior of shrink_dcache_parent(), but
838 * is also expected to be beneficial in reducing dentry cache
839 * fragmentation.
840 */
841 if (!parent)
842 return NULL;
843 if (parent == dentry)
844 return dentry;
845
846 /* Prune ancestors. */
847 dentry = parent;
848 while (dentry) {
849 if (lockref_put_or_lock(&dentry->d_lockref))
850 return NULL;
851 dentry = dentry_kill(dentry, 1);
852 }
853 return NULL;
854}
855
856static void shrink_dentry_list(struct list_head *list) 795static void shrink_dentry_list(struct list_head *list)
857{ 796{
858 struct dentry *dentry; 797 struct dentry *dentry, *parent;
859 798
860 rcu_read_lock(); 799 while (!list_empty(list)) {
861 for (;;) { 800 dentry = list_entry(list->prev, struct dentry, d_lru);
862 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
863 if (&dentry->d_lru == list)
864 break; /* empty */
865
866 /*
867 * Get the dentry lock, and re-verify that the dentry is
868 * this on the shrinking list. If it is, we know that
869 * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
870 */
871 spin_lock(&dentry->d_lock); 801 spin_lock(&dentry->d_lock);
872 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
873 spin_unlock(&dentry->d_lock);
874 continue;
875 }
876
877 /* 802 /*
878 * The dispose list is isolated and dentries are not accounted 803 * The dispose list is isolated and dentries are not accounted
879 * to the LRU here, so we can simply remove it from the list 804 * to the LRU here, so we can simply remove it from the list
@@ -885,30 +810,38 @@ static void shrink_dentry_list(struct list_head *list)
885 * We found an inuse dentry which was not removed from 810 * We found an inuse dentry which was not removed from
886 * the LRU because of laziness during lookup. Do not free it. 811 * the LRU because of laziness during lookup. Do not free it.
887 */ 812 */
888 if (dentry->d_lockref.count) { 813 if ((int)dentry->d_lockref.count > 0) {
889 spin_unlock(&dentry->d_lock); 814 spin_unlock(&dentry->d_lock);
890 continue; 815 continue;
891 } 816 }
892 rcu_read_unlock();
893 817
818 parent = dentry_kill(dentry, 0);
894 /* 819 /*
895 * If 'try_to_prune()' returns a dentry, it will 820 * If dentry_kill returns NULL, we have nothing more to do.
896 * be the same one we passed in, and d_lock will
897 * have been held the whole time, so it will not
898 * have been added to any other lists. We failed
899 * to get the inode lock.
900 *
901 * We just add it back to the shrink list.
902 */ 821 */
903 dentry = try_prune_one_dentry(dentry); 822 if (!parent)
823 continue;
904 824
905 rcu_read_lock(); 825 if (unlikely(parent == dentry)) {
906 if (dentry) { 826 /*
827 * trylocks have failed and d_lock has been held the
828 * whole time, so it could not have been added to any
829 * other lists. Just add it back to the shrink list.
830 */
907 d_shrink_add(dentry, list); 831 d_shrink_add(dentry, list);
908 spin_unlock(&dentry->d_lock); 832 spin_unlock(&dentry->d_lock);
833 continue;
909 } 834 }
835 /*
836 * We need to prune ancestors too. This is necessary to prevent
837 * quadratic behavior of shrink_dcache_parent(), but is also
838 * expected to be beneficial in reducing dentry cache
839 * fragmentation.
840 */
841 dentry = parent;
842 while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
843 dentry = dentry_kill(dentry, 1);
910 } 844 }
911 rcu_read_unlock();
912} 845}
913 846
914static enum lru_status 847static enum lru_status
@@ -1261,34 +1194,23 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1261 if (data->start == dentry) 1194 if (data->start == dentry)
1262 goto out; 1195 goto out;
1263 1196
1264 /* 1197 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1265 * move only zero ref count dentries to the dispose list.
1266 *
1267 * Those which are presently on the shrink list, being processed
1268 * by shrink_dentry_list(), shouldn't be moved. Otherwise the
1269 * loop in shrink_dcache_parent() might not make any progress
1270 * and loop forever.
1271 */
1272 if (dentry->d_lockref.count) {
1273 dentry_lru_del(dentry);
1274 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1275 /*
1276 * We can't use d_lru_shrink_move() because we
1277 * need to get the global LRU lock and do the
1278 * LRU accounting.
1279 */
1280 d_lru_del(dentry);
1281 d_shrink_add(dentry, &data->dispose);
1282 data->found++; 1198 data->found++;
1283 ret = D_WALK_NORETRY; 1199 } else {
1200 if (dentry->d_flags & DCACHE_LRU_LIST)
1201 d_lru_del(dentry);
1202 if (!dentry->d_lockref.count) {
1203 d_shrink_add(dentry, &data->dispose);
1204 data->found++;
1205 }
1284 } 1206 }
1285 /* 1207 /*
1286 * We can return to the caller if we have found some (this 1208 * We can return to the caller if we have found some (this
1287 * ensures forward progress). We'll be coming back to find 1209 * ensures forward progress). We'll be coming back to find
1288 * the rest. 1210 * the rest.
1289 */ 1211 */
1290 if (data->found && need_resched()) 1212 if (!list_empty(&data->dispose))
1291 ret = D_WALK_QUIT; 1213 ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1292out: 1214out:
1293 return ret; 1215 return ret;
1294} 1216}
@@ -1318,45 +1240,35 @@ void shrink_dcache_parent(struct dentry *parent)
1318} 1240}
1319EXPORT_SYMBOL(shrink_dcache_parent); 1241EXPORT_SYMBOL(shrink_dcache_parent);
1320 1242
1321static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry) 1243static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
1322{ 1244{
1323 struct select_data *data = _data; 1245 /* it has busy descendents; complain about those instead */
1324 enum d_walk_ret ret = D_WALK_CONTINUE; 1246 if (!list_empty(&dentry->d_subdirs))
1247 return D_WALK_CONTINUE;
1325 1248
1326 if (dentry->d_lockref.count) { 1249 /* root with refcount 1 is fine */
1327 dentry_lru_del(dentry); 1250 if (dentry == _data && dentry->d_lockref.count == 1)
1328 if (likely(!list_empty(&dentry->d_subdirs))) 1251 return D_WALK_CONTINUE;
1329 goto out; 1252
1330 if (dentry == data->start && dentry->d_lockref.count == 1) 1253 printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
1331 goto out; 1254 " still in use (%d) [unmount of %s %s]\n",
1332 printk(KERN_ERR
1333 "BUG: Dentry %p{i=%lx,n=%s}"
1334 " still in use (%d)"
1335 " [unmount of %s %s]\n",
1336 dentry, 1255 dentry,
1337 dentry->d_inode ? 1256 dentry->d_inode ?
1338 dentry->d_inode->i_ino : 0UL, 1257 dentry->d_inode->i_ino : 0UL,
1339 dentry->d_name.name, 1258 dentry,
1340 dentry->d_lockref.count, 1259 dentry->d_lockref.count,
1341 dentry->d_sb->s_type->name, 1260 dentry->d_sb->s_type->name,
1342 dentry->d_sb->s_id); 1261 dentry->d_sb->s_id);
1343 BUG(); 1262 WARN_ON(1);
1344 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { 1263 return D_WALK_CONTINUE;
1345 /* 1264}
1346 * We can't use d_lru_shrink_move() because we 1265
1347 * need to get the global LRU lock and do the 1266static void do_one_tree(struct dentry *dentry)
1348 * LRU accounting. 1267{
1349 */ 1268 shrink_dcache_parent(dentry);
1350 if (dentry->d_flags & DCACHE_LRU_LIST) 1269 d_walk(dentry, dentry, umount_check, NULL);
1351 d_lru_del(dentry); 1270 d_drop(dentry);
1352 d_shrink_add(dentry, &data->dispose); 1271 dput(dentry);
1353 data->found++;
1354 ret = D_WALK_NORETRY;
1355 }
1356out:
1357 if (data->found && need_resched())
1358 ret = D_WALK_QUIT;
1359 return ret;
1360} 1272}
1361 1273
1362/* 1274/*
@@ -1366,40 +1278,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
1366{ 1278{
1367 struct dentry *dentry; 1279 struct dentry *dentry;
1368 1280
1369 if (down_read_trylock(&sb->s_umount)) 1281 WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
1370 BUG();
1371 1282
1372 dentry = sb->s_root; 1283 dentry = sb->s_root;
1373 sb->s_root = NULL; 1284 sb->s_root = NULL;
1374 for (;;) { 1285 do_one_tree(dentry);
1375 struct select_data data;
1376
1377 INIT_LIST_HEAD(&data.dispose);
1378 data.start = dentry;
1379 data.found = 0;
1380
1381 d_walk(dentry, &data, umount_collect, NULL);
1382 if (!data.found)
1383 break;
1384
1385 shrink_dentry_list(&data.dispose);
1386 cond_resched();
1387 }
1388 d_drop(dentry);
1389 dput(dentry);
1390 1286
1391 while (!hlist_bl_empty(&sb->s_anon)) { 1287 while (!hlist_bl_empty(&sb->s_anon)) {
1392 struct select_data data; 1288 dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash));
1393 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); 1289 do_one_tree(dentry);
1394
1395 INIT_LIST_HEAD(&data.dispose);
1396 data.start = NULL;
1397 data.found = 0;
1398
1399 d_walk(dentry, &data, umount_collect, NULL);
1400 if (data.found)
1401 shrink_dentry_list(&data.dispose);
1402 cond_resched();
1403 } 1290 }
1404} 1291}
1405 1292
@@ -1647,8 +1534,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1647 unsigned add_flags = d_flags_for_inode(inode); 1534 unsigned add_flags = d_flags_for_inode(inode);
1648 1535
1649 spin_lock(&dentry->d_lock); 1536 spin_lock(&dentry->d_lock);
1650 dentry->d_flags &= ~DCACHE_ENTRY_TYPE; 1537 __d_set_type(dentry, add_flags);
1651 dentry->d_flags |= add_flags;
1652 if (inode) 1538 if (inode)
1653 hlist_add_head(&dentry->d_alias, &inode->i_dentry); 1539 hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1654 dentry->d_inode = inode; 1540 dentry->d_inode = inode;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b1436bbc..5c56785007e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
667 continue; 667 continue;
668 668
669 x = ext4_count_free(bitmap_bh->b_data, 669 x = ext4_count_free(bitmap_bh->b_data,
670 EXT4_BLOCKS_PER_GROUP(sb) / 8); 670 EXT4_CLUSTERS_PER_GROUP(sb) / 8);
671 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 671 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
672 i, ext4_free_group_clusters(sb, gdp), x); 672 i, ext4_free_group_clusters(sb, gdp), x);
673 bitmap_count += x; 673 bitmap_count += x;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f1c65dc7cc0a..66946aa62127 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2466 up_write(&EXT4_I(inode)->i_data_sem); 2466 up_write(&EXT4_I(inode)->i_data_sem);
2467} 2467}
2468 2468
2469/*
2470 * Update i_disksize after writeback has been started. Races with truncate
2471 * are avoided by checking i_size under i_data_sem.
2472 */
2473static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
2474{
2475 loff_t i_size;
2476
2477 down_write(&EXT4_I(inode)->i_data_sem);
2478 i_size = i_size_read(inode);
2479 if (newsize > i_size)
2480 newsize = i_size;
2481 if (newsize > EXT4_I(inode)->i_disksize)
2482 EXT4_I(inode)->i_disksize = newsize;
2483 up_write(&EXT4_I(inode)->i_data_sem);
2484}
2485
2486struct ext4_group_info { 2469struct ext4_group_info {
2487 unsigned long bb_state; 2470 unsigned long bb_state;
2488 struct rb_root bb_free_root; 2471 struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 82df3ce9874a..01b0c208f625 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
3313 return PTR_ERR(path); 3313 return PTR_ERR(path);
3314 depth = ext_depth(inode); 3314 depth = ext_depth(inode);
3315 ex = path[depth].p_ext; 3315 ex = path[depth].p_ext;
3316 if (!ex) {
3317 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3318 (unsigned long) map->m_lblk);
3319 return -EIO;
3320 }
3316 uninitialized = ext4_ext_is_uninitialized(ex); 3321 uninitialized = ext4_ext_is_uninitialized(ex);
3317 split_flag1 = 0; 3322 split_flag1 = 0;
3318 3323
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
3694 } 3699 }
3695 depth = ext_depth(inode); 3700 depth = ext_depth(inode);
3696 ex = path[depth].p_ext; 3701 ex = path[depth].p_ext;
3702 if (!ex) {
3703 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3704 (unsigned long) map->m_lblk);
3705 err = -EIO;
3706 goto out;
3707 }
3697 } 3708 }
3698 3709
3699 err = ext4_ext_get_access(handle, inode, path + depth); 3710 err = ext4_ext_get_access(handle, inode, path + depth);
@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4730 4741
4731 trace_ext4_zero_range(inode, offset, len, mode); 4742 trace_ext4_zero_range(inode, offset, len, mode);
4732 4743
4744 if (!S_ISREG(inode->i_mode))
4745 return -EINVAL;
4746
4733 /* 4747 /*
4734 * Write out all dirty pages to avoid race conditions 4748 * Write out all dirty pages to avoid race conditions
4735 * Then release them. 4749 * Then release them.
@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4878 if (mode & FALLOC_FL_PUNCH_HOLE) 4892 if (mode & FALLOC_FL_PUNCH_HOLE)
4879 return ext4_punch_hole(inode, offset, len); 4893 return ext4_punch_hole(inode, offset, len);
4880 4894
4881 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4882 return ext4_collapse_range(inode, offset, len);
4883
4884 ret = ext4_convert_inline_data(inode); 4895 ret = ext4_convert_inline_data(inode);
4885 if (ret) 4896 if (ret)
4886 return ret; 4897 return ret;
@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4892 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4903 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4893 return -EOPNOTSUPP; 4904 return -EOPNOTSUPP;
4894 4905
4906 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4907 return ext4_collapse_range(inode, offset, len);
4908
4895 if (mode & FALLOC_FL_ZERO_RANGE) 4909 if (mode & FALLOC_FL_ZERO_RANGE)
4896 return ext4_zero_range(file, offset, len, mode); 4910 return ext4_zero_range(file, offset, len, mode);
4897 4911
@@ -5229,18 +5243,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5229 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) 5243 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5230 update = 1; 5244 update = 1;
5231 5245
5232 *start = ex_last->ee_block + 5246 *start = le32_to_cpu(ex_last->ee_block) +
5233 ext4_ext_get_actual_len(ex_last); 5247 ext4_ext_get_actual_len(ex_last);
5234 5248
5235 while (ex_start <= ex_last) { 5249 while (ex_start <= ex_last) {
5236 ex_start->ee_block -= shift; 5250 le32_add_cpu(&ex_start->ee_block, -shift);
5237 if (ex_start > 5251 /* Try to merge to the left. */
5238 EXT_FIRST_EXTENT(path[depth].p_hdr)) { 5252 if ((ex_start >
5239 if (ext4_ext_try_to_merge_right(inode, 5253 EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
5240 path, ex_start - 1)) 5254 ext4_ext_try_to_merge_right(inode,
5241 ex_last--; 5255 path, ex_start - 1))
5242 } 5256 ex_last--;
5243 ex_start++; 5257 else
5258 ex_start++;
5244 } 5259 }
5245 err = ext4_ext_dirty(handle, inode, path + depth); 5260 err = ext4_ext_dirty(handle, inode, path + depth);
5246 if (err) 5261 if (err)
@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5255 if (err) 5270 if (err)
5256 goto out; 5271 goto out;
5257 5272
5258 path[depth].p_idx->ei_block -= shift; 5273 le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5259 err = ext4_ext_dirty(handle, inode, path + depth); 5274 err = ext4_ext_dirty(handle, inode, path + depth);
5260 if (err) 5275 if (err)
5261 goto out; 5276 goto out;
@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5300 return ret; 5315 return ret;
5301 } 5316 }
5302 5317
5303 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); 5318 stop_block = le32_to_cpu(extent->ee_block) +
5319 ext4_ext_get_actual_len(extent);
5304 ext4_ext_drop_refs(path); 5320 ext4_ext_drop_refs(path);
5305 kfree(path); 5321 kfree(path);
5306 5322
@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5313 * enough to accomodate the shift. 5329 * enough to accomodate the shift.
5314 */ 5330 */
5315 path = ext4_ext_find_extent(inode, start - 1, NULL, 0); 5331 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5332 if (IS_ERR(path))
5333 return PTR_ERR(path);
5316 depth = path->p_depth; 5334 depth = path->p_depth;
5317 extent = path[depth].p_ext; 5335 extent = path[depth].p_ext;
5318 ex_start = extent->ee_block; 5336 if (extent) {
5319 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); 5337 ex_start = le32_to_cpu(extent->ee_block);
5338 ex_end = le32_to_cpu(extent->ee_block) +
5339 ext4_ext_get_actual_len(extent);
5340 } else {
5341 ex_start = 0;
5342 ex_end = 0;
5343 }
5320 ext4_ext_drop_refs(path); 5344 ext4_ext_drop_refs(path);
5321 kfree(path); 5345 kfree(path);
5322 5346
@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5331 return PTR_ERR(path); 5355 return PTR_ERR(path);
5332 depth = path->p_depth; 5356 depth = path->p_depth;
5333 extent = path[depth].p_ext; 5357 extent = path[depth].p_ext;
5334 current_block = extent->ee_block; 5358 if (!extent) {
5359 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5360 (unsigned long) start);
5361 return -EIO;
5362 }
5363
5364 current_block = le32_to_cpu(extent->ee_block);
5335 if (start > current_block) { 5365 if (start > current_block) {
5336 /* Hole, move to the next extent */ 5366 /* Hole, move to the next extent */
5337 ret = mext_next_extent(inode, path, &extent); 5367 ret = mext_next_extent(inode, path, &extent);
@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5365 ext4_lblk_t punch_start, punch_stop; 5395 ext4_lblk_t punch_start, punch_stop;
5366 handle_t *handle; 5396 handle_t *handle;
5367 unsigned int credits; 5397 unsigned int credits;
5368 loff_t new_size; 5398 loff_t new_size, ioffset;
5369 int ret; 5399 int ret;
5370 5400
5371 BUG_ON(offset + len > i_size_read(inode));
5372
5373 /* Collapse range works only on fs block size aligned offsets. */ 5401 /* Collapse range works only on fs block size aligned offsets. */
5374 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || 5402 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5375 len & (EXT4_BLOCK_SIZE(sb) - 1)) 5403 len & (EXT4_BLOCK_SIZE(sb) - 1))
5376 return -EINVAL; 5404 return -EINVAL;
5377 5405
5378 if (!S_ISREG(inode->i_mode)) 5406 if (!S_ISREG(inode->i_mode))
5407 return -EINVAL;
5408
5409 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
5379 return -EOPNOTSUPP; 5410 return -EOPNOTSUPP;
5380 5411
5381 trace_ext4_collapse_range(inode, offset, len); 5412 trace_ext4_collapse_range(inode, offset, len);
@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5383 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); 5414 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5384 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); 5415 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5385 5416
5417 /* Call ext4_force_commit to flush all data in case of data=journal. */
5418 if (ext4_should_journal_data(inode)) {
5419 ret = ext4_force_commit(inode->i_sb);
5420 if (ret)
5421 return ret;
5422 }
5423
5424 /*
5425 * Need to round down offset to be aligned with page size boundary
5426 * for page size > block size.
5427 */
5428 ioffset = round_down(offset, PAGE_SIZE);
5429
5386 /* Write out all dirty pages */ 5430 /* Write out all dirty pages */
5387 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); 5431 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5432 LLONG_MAX);
5388 if (ret) 5433 if (ret)
5389 return ret; 5434 return ret;
5390 5435
5391 /* Take mutex lock */ 5436 /* Take mutex lock */
5392 mutex_lock(&inode->i_mutex); 5437 mutex_lock(&inode->i_mutex);
5393 5438
5394 /* It's not possible punch hole on append only file */ 5439 /*
5395 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { 5440 * There is no need to overlap collapse range with EOF, in which case
5396 ret = -EPERM; 5441 * it is effectively a truncate operation
5397 goto out_mutex; 5442 */
5398 } 5443 if (offset + len >= i_size_read(inode)) {
5399 5444 ret = -EINVAL;
5400 if (IS_SWAPFILE(inode)) {
5401 ret = -ETXTBSY;
5402 goto out_mutex; 5445 goto out_mutex;
5403 } 5446 }
5404 5447
@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5408 goto out_mutex; 5451 goto out_mutex;
5409 } 5452 }
5410 5453
5411 truncate_pagecache_range(inode, offset, -1); 5454 truncate_pagecache(inode, ioffset);
5412 5455
5413 /* Wait for existing dio to complete */ 5456 /* Wait for existing dio to complete */
5414 ext4_inode_block_unlocked_dio(inode); 5457 ext4_inode_block_unlocked_dio(inode);
@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5425 ext4_discard_preallocations(inode); 5468 ext4_discard_preallocations(inode);
5426 5469
5427 ret = ext4_es_remove_extent(inode, punch_start, 5470 ret = ext4_es_remove_extent(inode, punch_start,
5428 EXT_MAX_BLOCKS - punch_start - 1); 5471 EXT_MAX_BLOCKS - punch_start);
5429 if (ret) { 5472 if (ret) {
5430 up_write(&EXT4_I(inode)->i_data_sem); 5473 up_write(&EXT4_I(inode)->i_data_sem);
5431 goto out_stop; 5474 goto out_stop;
@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5436 up_write(&EXT4_I(inode)->i_data_sem); 5479 up_write(&EXT4_I(inode)->i_data_sem);
5437 goto out_stop; 5480 goto out_stop;
5438 } 5481 }
5482 ext4_discard_preallocations(inode);
5439 5483
5440 ret = ext4_ext_shift_extents(inode, handle, punch_stop, 5484 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5441 punch_stop - punch_start); 5485 punch_stop - punch_start);
@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5445 } 5489 }
5446 5490
5447 new_size = i_size_read(inode) - len; 5491 new_size = i_size_read(inode) - len;
5448 truncate_setsize(inode, new_size); 5492 i_size_write(inode, new_size);
5449 EXT4_I(inode)->i_disksize = new_size; 5493 EXT4_I(inode)->i_disksize = new_size;
5450 5494
5451 ext4_discard_preallocations(inode);
5452 up_write(&EXT4_I(inode)->i_data_sem); 5495 up_write(&EXT4_I(inode)->i_data_sem);
5453 if (IS_SYNC(inode)) 5496 if (IS_SYNC(inode))
5454 ext4_handle_sync(handle); 5497 ext4_handle_sync(handle);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0a014a7194b2..0ebc21204b51 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -810,7 +810,7 @@ retry:
810 810
811 newes.es_lblk = end + 1; 811 newes.es_lblk = end + 1;
812 newes.es_len = len2; 812 newes.es_len = len2;
813 block = 0x7FDEADBEEF; 813 block = 0x7FDEADBEEFULL;
814 if (ext4_es_is_written(&orig_es) || 814 if (ext4_es_is_written(&orig_es) ||
815 ext4_es_is_unwritten(&orig_es)) 815 ext4_es_is_unwritten(&orig_es))
816 block = ext4_es_pblock(&orig_es) + 816 block = ext4_es_pblock(&orig_es) +
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ca7502d89fde..063fc1538355 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
82 size_t count = iov_length(iov, nr_segs); 82 size_t count = iov_length(iov, nr_segs);
83 loff_t final_size = pos + count; 83 loff_t final_size = pos + count;
84 84
85 if (pos >= inode->i_size) 85 if (pos >= i_size_read(inode))
86 return 0; 86 return 0;
87 87
88 if ((pos & blockmask) || (final_size & blockmask)) 88 if ((pos & blockmask) || (final_size & blockmask))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b0d2c7d5408..d7b7462a0e13 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
522 if (unlikely(map->m_len > INT_MAX)) 522 if (unlikely(map->m_len > INT_MAX))
523 map->m_len = INT_MAX; 523 map->m_len = INT_MAX;
524 524
525 /* We can handle the block number less than EXT_MAX_BLOCKS */
526 if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
527 return -EIO;
528
525 /* Lookup extent status tree firstly */ 529 /* Lookup extent status tree firstly */
526 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 530 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
527 ext4_es_lru_add(inode); 531 ext4_es_lru_add(inode);
@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2243 return err; 2247 return err;
2244 } while (map->m_len); 2248 } while (map->m_len);
2245 2249
2246 /* Update on-disk size after IO is submitted */ 2250 /*
2251 * Update on-disk size after IO is submitted. Races with
2252 * truncate are avoided by checking i_size under i_data_sem.
2253 */
2247 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2254 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2248 if (disksize > EXT4_I(inode)->i_disksize) { 2255 if (disksize > EXT4_I(inode)->i_disksize) {
2249 int err2; 2256 int err2;
2250 2257 loff_t i_size;
2251 ext4_wb_update_i_disksize(inode, disksize); 2258
2259 down_write(&EXT4_I(inode)->i_data_sem);
2260 i_size = i_size_read(inode);
2261 if (disksize > i_size)
2262 disksize = i_size;
2263 if (disksize > EXT4_I(inode)->i_disksize)
2264 EXT4_I(inode)->i_disksize = disksize;
2252 err2 = ext4_mark_inode_dirty(handle, inode); 2265 err2 = ext4_mark_inode_dirty(handle, inode);
2266 up_write(&EXT4_I(inode)->i_data_sem);
2253 if (err2) 2267 if (err2)
2254 ext4_error(inode->i_sb, 2268 ext4_error(inode->i_sb,
2255 "Failed to mark inode %lu dirty", 2269 "Failed to mark inode %lu dirty",
@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3527 } 3541 }
3528 3542
3529 mutex_lock(&inode->i_mutex); 3543 mutex_lock(&inode->i_mutex);
3530 /* It's not possible punch hole on append only file */
3531 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
3532 ret = -EPERM;
3533 goto out_mutex;
3534 }
3535 if (IS_SWAPFILE(inode)) {
3536 ret = -ETXTBSY;
3537 goto out_mutex;
3538 }
3539 3544
3540 /* No need to punch hole beyond i_size */ 3545 /* No need to punch hole beyond i_size */
3541 if (offset >= inode->i_size) 3546 if (offset >= inode->i_size)
@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3616 ret = ext4_free_hole_blocks(handle, inode, first_block, 3621 ret = ext4_free_hole_blocks(handle, inode, first_block,
3617 stop_block); 3622 stop_block);
3618 3623
3619 ext4_discard_preallocations(inode);
3620 up_write(&EXT4_I(inode)->i_data_sem); 3624 up_write(&EXT4_I(inode)->i_data_sem);
3621 if (IS_SYNC(inode)) 3625 if (IS_SYNC(inode))
3622 ext4_handle_sync(handle); 3626 ext4_handle_sync(handle);
@@ -4423,21 +4427,20 @@ out_brelse:
4423 * 4427 *
4424 * We are called from a few places: 4428 * We are called from a few places:
4425 * 4429 *
4426 * - Within generic_file_write() for O_SYNC files. 4430 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
4427 * Here, there will be no transaction running. We wait for any running 4431 * Here, there will be no transaction running. We wait for any running
4428 * transaction to commit. 4432 * transaction to commit.
4429 * 4433 *
4430 * - Within sys_sync(), kupdate and such. 4434 * - Within flush work (sys_sync(), kupdate and such).
4431 * We wait on commit, if tol to. 4435 * We wait on commit, if told to.
4432 * 4436 *
4433 * - Within prune_icache() (PF_MEMALLOC == true) 4437 * - Within iput_final() -> write_inode_now()
4434 * Here we simply return. We can't afford to block kswapd on the 4438 * We wait on commit, if told to.
4435 * journal commit.
4436 * 4439 *
4437 * In all cases it is actually safe for us to return without doing anything, 4440 * In all cases it is actually safe for us to return without doing anything,
4438 * because the inode has been copied into a raw inode buffer in 4441 * because the inode has been copied into a raw inode buffer in
4439 * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 4442 * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
4440 * knfsd. 4443 * writeback.
4441 * 4444 *
4442 * Note that we are absolutely dependent upon all inode dirtiers doing the 4445 * Note that we are absolutely dependent upon all inode dirtiers doing the
4443 * right thing: they *must* call mark_inode_dirty() after dirtying info in 4446 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -4449,15 +4452,15 @@ out_brelse:
4449 * stuff(); 4452 * stuff();
4450 * inode->i_size = expr; 4453 * inode->i_size = expr;
4451 * 4454 *
4452 * is in error because a kswapd-driven write_inode() could occur while 4455 * is in error because write_inode() could occur while `stuff()' is running,
4453 * `stuff()' is running, and the new i_size will be lost. Plus the inode 4456 * and the new i_size will be lost. Plus the inode will no longer be on the
4454 * will no longer be on the superblock's dirty inode list. 4457 * superblock's dirty inode list.
4455 */ 4458 */
4456int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) 4459int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4457{ 4460{
4458 int err; 4461 int err;
4459 4462
4460 if (current->flags & PF_MEMALLOC) 4463 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
4461 return 0; 4464 return 0;
4462 4465
4463 if (EXT4_SB(inode->i_sb)->s_journal) { 4466 if (EXT4_SB(inode->i_sb)->s_journal) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a888cac76e9c..c8238a26818c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
989 poff = block % blocks_per_page; 989 poff = block % blocks_per_page;
990 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 990 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
991 if (!page) 991 if (!page)
992 return -EIO; 992 return -ENOMEM;
993 BUG_ON(page->mapping != inode->i_mapping); 993 BUG_ON(page->mapping != inode->i_mapping);
994 e4b->bd_bitmap_page = page; 994 e4b->bd_bitmap_page = page;
995 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); 995 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1003 pnum = block / blocks_per_page; 1003 pnum = block / blocks_per_page;
1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1005 if (!page) 1005 if (!page)
1006 return -EIO; 1006 return -ENOMEM;
1007 BUG_ON(page->mapping != inode->i_mapping); 1007 BUG_ON(page->mapping != inode->i_mapping);
1008 e4b->bd_buddy_page = page; 1008 e4b->bd_buddy_page = page;
1009 return 0; 1009 return 0;
@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1168 unlock_page(page); 1168 unlock_page(page);
1169 } 1169 }
1170 } 1170 }
1171 if (page == NULL || !PageUptodate(page)) { 1171 if (page == NULL) {
1172 ret = -ENOMEM;
1173 goto err;
1174 }
1175 if (!PageUptodate(page)) {
1172 ret = -EIO; 1176 ret = -EIO;
1173 goto err; 1177 goto err;
1174 } 1178 }
@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1197 unlock_page(page); 1201 unlock_page(page);
1198 } 1202 }
1199 } 1203 }
1200 if (page == NULL || !PageUptodate(page)) { 1204 if (page == NULL) {
1205 ret = -ENOMEM;
1206 goto err;
1207 }
1208 if (!PageUptodate(page)) {
1201 ret = -EIO; 1209 ret = -EIO;
1202 goto err; 1210 goto err;
1203 } 1211 }
@@ -5008,6 +5016,8 @@ error_return:
5008 */ 5016 */
5009static int ext4_trim_extent(struct super_block *sb, int start, int count, 5017static int ext4_trim_extent(struct super_block *sb, int start, int count,
5010 ext4_group_t group, struct ext4_buddy *e4b) 5018 ext4_group_t group, struct ext4_buddy *e4b)
5019__releases(bitlock)
5020__acquires(bitlock)
5011{ 5021{
5012 struct ext4_free_extent ex; 5022 struct ext4_free_extent ex;
5013 int ret = 0; 5023 int ret = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ab95508e3d40..c18d95b50540 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
308 if (error) { 308 if (error) {
309 struct inode *inode = io_end->inode; 309 struct inode *inode = io_end->inode;
310 310
311 ext4_warning(inode->i_sb, "I/O error writing to inode %lu " 311 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
312 "(offset %llu size %ld starting block %llu)", 312 "(offset %llu size %ld starting block %llu)",
313 inode->i_ino, 313 error, inode->i_ino,
314 (unsigned long long) io_end->offset, 314 (unsigned long long) io_end->offset,
315 (long) io_end->size, 315 (long) io_end->size,
316 (unsigned long long) 316 (unsigned long long)
317 bi_sector >> (inode->i_blkbits - 9)); 317 bi_sector >> (inode->i_blkbits - 9));
318 mapping_set_error(inode->i_mapping, error);
318 } 319 }
319 320
320 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 321 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f3c667091618..6f9e6fadac04 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3869 goto failed_mount2; 3869 goto failed_mount2;
3870 } 3870 }
3871 } 3871 }
3872
3873 /*
3874 * set up enough so that it can read an inode,
3875 * and create new inode for buddy allocator
3876 */
3877 sbi->s_gdb_count = db_count;
3878 if (!test_opt(sb, NOLOAD) &&
3879 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3880 sb->s_op = &ext4_sops;
3881 else
3882 sb->s_op = &ext4_nojournal_sops;
3883
3884 ext4_ext_init(sb);
3885 err = ext4_mb_init(sb);
3886 if (err) {
3887 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3888 err);
3889 goto failed_mount2;
3890 }
3891
3872 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3892 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3873 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3893 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3874 goto failed_mount2; 3894 goto failed_mount2a;
3875 } 3895 }
3876 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3896 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3877 if (!ext4_fill_flex_info(sb)) { 3897 if (!ext4_fill_flex_info(sb)) {
3878 ext4_msg(sb, KERN_ERR, 3898 ext4_msg(sb, KERN_ERR,
3879 "unable to initialize " 3899 "unable to initialize "
3880 "flex_bg meta info!"); 3900 "flex_bg meta info!");
3881 goto failed_mount2; 3901 goto failed_mount2a;
3882 } 3902 }
3883 3903
3884 sbi->s_gdb_count = db_count;
3885 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3904 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3886 spin_lock_init(&sbi->s_next_gen_lock); 3905 spin_lock_init(&sbi->s_next_gen_lock);
3887 3906
@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3916 sbi->s_stripe = ext4_get_stripe_size(sbi); 3935 sbi->s_stripe = ext4_get_stripe_size(sbi);
3917 sbi->s_extent_max_zeroout_kb = 32; 3936 sbi->s_extent_max_zeroout_kb = 32;
3918 3937
3919 /*
3920 * set up enough so that it can read an inode
3921 */
3922 if (!test_opt(sb, NOLOAD) &&
3923 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3924 sb->s_op = &ext4_sops;
3925 else
3926 sb->s_op = &ext4_nojournal_sops;
3927 sb->s_export_op = &ext4_export_ops; 3938 sb->s_export_op = &ext4_export_ops;
3928 sb->s_xattr = ext4_xattr_handlers; 3939 sb->s_xattr = ext4_xattr_handlers;
3929#ifdef CONFIG_QUOTA 3940#ifdef CONFIG_QUOTA
@@ -4113,21 +4124,13 @@ no_journal:
4113 if (err) { 4124 if (err) {
4114 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4125 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4115 "reserved pool", ext4_calculate_resv_clusters(sb)); 4126 "reserved pool", ext4_calculate_resv_clusters(sb));
4116 goto failed_mount4a; 4127 goto failed_mount5;
4117 } 4128 }
4118 4129
4119 err = ext4_setup_system_zone(sb); 4130 err = ext4_setup_system_zone(sb);
4120 if (err) { 4131 if (err) {
4121 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4132 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4122 "zone (%d)", err); 4133 "zone (%d)", err);
4123 goto failed_mount4a;
4124 }
4125
4126 ext4_ext_init(sb);
4127 err = ext4_mb_init(sb);
4128 if (err) {
4129 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4130 err);
4131 goto failed_mount5; 4134 goto failed_mount5;
4132 } 4135 }
4133 4136
@@ -4204,11 +4207,8 @@ failed_mount8:
4204failed_mount7: 4207failed_mount7:
4205 ext4_unregister_li_request(sb); 4208 ext4_unregister_li_request(sb);
4206failed_mount6: 4209failed_mount6:
4207 ext4_mb_release(sb);
4208failed_mount5:
4209 ext4_ext_release(sb);
4210 ext4_release_system_zone(sb); 4210 ext4_release_system_zone(sb);
4211failed_mount4a: 4211failed_mount5:
4212 dput(sb->s_root); 4212 dput(sb->s_root);
4213 sb->s_root = NULL; 4213 sb->s_root = NULL;
4214failed_mount4: 4214failed_mount4:
@@ -4232,11 +4232,14 @@ failed_mount3:
4232 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4232 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4233 if (sbi->s_mmp_tsk) 4233 if (sbi->s_mmp_tsk)
4234 kthread_stop(sbi->s_mmp_tsk); 4234 kthread_stop(sbi->s_mmp_tsk);
4235failed_mount2a:
4236 ext4_mb_release(sb);
4235failed_mount2: 4237failed_mount2:
4236 for (i = 0; i < db_count; i++) 4238 for (i = 0; i < db_count; i++)
4237 brelse(sbi->s_group_desc[i]); 4239 brelse(sbi->s_group_desc[i]);
4238 ext4_kvfree(sbi->s_group_desc); 4240 ext4_kvfree(sbi->s_group_desc);
4239failed_mount: 4241failed_mount:
4242 ext4_ext_release(sb);
4240 if (sbi->s_chksum_driver) 4243 if (sbi->s_chksum_driver)
4241 crypto_free_shash(sbi->s_chksum_driver); 4244 crypto_free_shash(sbi->s_chksum_driver);
4242 if (sbi->s_proc) { 4245 if (sbi->s_proc) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1f5cf5880718..4eec399ec807 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
520} 520}
521 521
522/* 522/*
523 * Release the xattr block BH: If the reference count is > 1, decrement 523 * Release the xattr block BH: If the reference count is > 1, decrement it;
524 * it; otherwise free the block. 524 * otherwise free the block.
525 */ 525 */
526static void 526static void
527ext4_xattr_release_block(handle_t *handle, struct inode *inode, 527ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
542 if (ce) 542 if (ce)
543 mb_cache_entry_free(ce); 543 mb_cache_entry_free(ce);
544 get_bh(bh); 544 get_bh(bh);
545 unlock_buffer(bh);
545 ext4_free_blocks(handle, inode, bh, 0, 1, 546 ext4_free_blocks(handle, inode, bh, 0, 1,
546 EXT4_FREE_BLOCKS_METADATA | 547 EXT4_FREE_BLOCKS_METADATA |
547 EXT4_FREE_BLOCKS_FORGET); 548 EXT4_FREE_BLOCKS_FORGET);
548 unlock_buffer(bh);
549 } else { 549 } else {
550 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 550 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
551 if (ce) 551 if (ce)
552 mb_cache_entry_release(ce); 552 mb_cache_entry_release(ce);
553 /*
554 * Beware of this ugliness: Releasing of xattr block references
555 * from different inodes can race and so we have to protect
556 * from a race where someone else frees the block (and releases
557 * its journal_head) before we are done dirtying the buffer. In
558 * nojournal mode this race is harmless and we actually cannot
559 * call ext4_handle_dirty_xattr_block() with locked buffer as
560 * that function can call sync_dirty_buffer() so for that case
561 * we handle the dirtying after unlocking the buffer.
562 */
563 if (ext4_handle_valid(handle))
564 error = ext4_handle_dirty_xattr_block(handle, inode,
565 bh);
553 unlock_buffer(bh); 566 unlock_buffer(bh);
554 error = ext4_handle_dirty_xattr_block(handle, inode, bh); 567 if (!ext4_handle_valid(handle))
568 error = ext4_handle_dirty_xattr_block(handle, inode,
569 bh);
555 if (IS_SYNC(inode)) 570 if (IS_SYNC(inode))
556 ext4_handle_sync(handle); 571 ext4_handle_sync(handle);
557 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); 572 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9ead1596399a..72c82f69b01b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
274 break; 274 break;
275#if BITS_PER_LONG != 32 275#if BITS_PER_LONG != 32
276 /* 32-bit arches must use fcntl64() */ 276 /* 32-bit arches must use fcntl64() */
277 case F_GETLKP: 277 case F_OFD_GETLK:
278#endif 278#endif
279 case F_GETLK: 279 case F_GETLK:
280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); 280 err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
281 break; 281 break;
282#if BITS_PER_LONG != 32 282#if BITS_PER_LONG != 32
283 /* 32-bit arches must use fcntl64() */ 283 /* 32-bit arches must use fcntl64() */
284 case F_SETLKP: 284 case F_OFD_SETLK:
285 case F_SETLKPW: 285 case F_OFD_SETLKW:
286#endif 286#endif
287 /* Fallthrough */ 287 /* Fallthrough */
288 case F_SETLK: 288 case F_SETLK:
@@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
399 399
400 switch (cmd) { 400 switch (cmd) {
401 case F_GETLK64: 401 case F_GETLK64:
402 case F_GETLKP: 402 case F_OFD_GETLK:
403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); 403 err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
404 break; 404 break;
405 case F_SETLK64: 405 case F_SETLK64:
406 case F_SETLKW64: 406 case F_SETLKW64:
407 case F_SETLKP: 407 case F_OFD_SETLK:
408 case F_SETLKPW: 408 case F_OFD_SETLKW:
409 err = fcntl_setlk64(fd, f.file, cmd, 409 err = fcntl_setlk64(fd, f.file, cmd,
410 (struct flock64 __user *) arg); 410 (struct flock64 __user *) arg);
411 break; 411 break;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index a0b0855d00a9..205e0d5d5307 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -348,7 +348,7 @@ int __init fuse_ctl_init(void)
348 return register_filesystem(&fuse_ctl_fs_type); 348 return register_filesystem(&fuse_ctl_fs_type);
349} 349}
350 350
351void fuse_ctl_cleanup(void) 351void __exit fuse_ctl_cleanup(void)
352{ 352{
353 unregister_filesystem(&fuse_ctl_fs_type); 353 unregister_filesystem(&fuse_ctl_fs_type);
354} 354}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5b4e035b364c..42198359fa1b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -679,6 +679,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
679 return create_new_entry(fc, req, dir, entry, S_IFLNK); 679 return create_new_entry(fc, req, dir, entry, S_IFLNK);
680} 680}
681 681
682static inline void fuse_update_ctime(struct inode *inode)
683{
684 if (!IS_NOCMTIME(inode)) {
685 inode->i_ctime = current_fs_time(inode->i_sb);
686 mark_inode_dirty_sync(inode);
687 }
688}
689
682static int fuse_unlink(struct inode *dir, struct dentry *entry) 690static int fuse_unlink(struct inode *dir, struct dentry *entry)
683{ 691{
684 int err; 692 int err;
@@ -713,6 +721,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
713 fuse_invalidate_attr(inode); 721 fuse_invalidate_attr(inode);
714 fuse_invalidate_attr(dir); 722 fuse_invalidate_attr(dir);
715 fuse_invalidate_entry_cache(entry); 723 fuse_invalidate_entry_cache(entry);
724 fuse_update_ctime(inode);
716 } else if (err == -EINTR) 725 } else if (err == -EINTR)
717 fuse_invalidate_entry(entry); 726 fuse_invalidate_entry(entry);
718 return err; 727 return err;
@@ -743,23 +752,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
743 return err; 752 return err;
744} 753}
745 754
746static int fuse_rename(struct inode *olddir, struct dentry *oldent, 755static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
747 struct inode *newdir, struct dentry *newent) 756 struct inode *newdir, struct dentry *newent,
757 unsigned int flags, int opcode, size_t argsize)
748{ 758{
749 int err; 759 int err;
750 struct fuse_rename_in inarg; 760 struct fuse_rename2_in inarg;
751 struct fuse_conn *fc = get_fuse_conn(olddir); 761 struct fuse_conn *fc = get_fuse_conn(olddir);
752 struct fuse_req *req = fuse_get_req_nopages(fc); 762 struct fuse_req *req;
753 763
764 req = fuse_get_req_nopages(fc);
754 if (IS_ERR(req)) 765 if (IS_ERR(req))
755 return PTR_ERR(req); 766 return PTR_ERR(req);
756 767
757 memset(&inarg, 0, sizeof(inarg)); 768 memset(&inarg, 0, argsize);
758 inarg.newdir = get_node_id(newdir); 769 inarg.newdir = get_node_id(newdir);
759 req->in.h.opcode = FUSE_RENAME; 770 inarg.flags = flags;
771 req->in.h.opcode = opcode;
760 req->in.h.nodeid = get_node_id(olddir); 772 req->in.h.nodeid = get_node_id(olddir);
761 req->in.numargs = 3; 773 req->in.numargs = 3;
762 req->in.args[0].size = sizeof(inarg); 774 req->in.args[0].size = argsize;
763 req->in.args[0].value = &inarg; 775 req->in.args[0].value = &inarg;
764 req->in.args[1].size = oldent->d_name.len + 1; 776 req->in.args[1].size = oldent->d_name.len + 1;
765 req->in.args[1].value = oldent->d_name.name; 777 req->in.args[1].value = oldent->d_name.name;
@@ -771,15 +783,22 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
771 if (!err) { 783 if (!err) {
772 /* ctime changes */ 784 /* ctime changes */
773 fuse_invalidate_attr(oldent->d_inode); 785 fuse_invalidate_attr(oldent->d_inode);
786 fuse_update_ctime(oldent->d_inode);
787
788 if (flags & RENAME_EXCHANGE) {
789 fuse_invalidate_attr(newent->d_inode);
790 fuse_update_ctime(newent->d_inode);
791 }
774 792
775 fuse_invalidate_attr(olddir); 793 fuse_invalidate_attr(olddir);
776 if (olddir != newdir) 794 if (olddir != newdir)
777 fuse_invalidate_attr(newdir); 795 fuse_invalidate_attr(newdir);
778 796
779 /* newent will end up negative */ 797 /* newent will end up negative */
780 if (newent->d_inode) { 798 if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
781 fuse_invalidate_attr(newent->d_inode); 799 fuse_invalidate_attr(newent->d_inode);
782 fuse_invalidate_entry_cache(newent); 800 fuse_invalidate_entry_cache(newent);
801 fuse_update_ctime(newent->d_inode);
783 } 802 }
784 } else if (err == -EINTR) { 803 } else if (err == -EINTR) {
785 /* If request was interrupted, DEITY only knows if the 804 /* If request was interrupted, DEITY only knows if the
@@ -795,6 +814,36 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
795 return err; 814 return err;
796} 815}
797 816
817static int fuse_rename(struct inode *olddir, struct dentry *oldent,
818 struct inode *newdir, struct dentry *newent)
819{
820 return fuse_rename_common(olddir, oldent, newdir, newent, 0,
821 FUSE_RENAME, sizeof(struct fuse_rename_in));
822}
823
824static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
825 struct inode *newdir, struct dentry *newent,
826 unsigned int flags)
827{
828 struct fuse_conn *fc = get_fuse_conn(olddir);
829 int err;
830
831 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
832 return -EINVAL;
833
834 if (fc->no_rename2 || fc->minor < 23)
835 return -EINVAL;
836
837 err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
838 FUSE_RENAME2, sizeof(struct fuse_rename2_in));
839 if (err == -ENOSYS) {
840 fc->no_rename2 = 1;
841 err = -EINVAL;
842 }
843 return err;
844
845}
846
798static int fuse_link(struct dentry *entry, struct inode *newdir, 847static int fuse_link(struct dentry *entry, struct inode *newdir,
799 struct dentry *newent) 848 struct dentry *newent)
800{ 849{
@@ -829,6 +878,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
829 inc_nlink(inode); 878 inc_nlink(inode);
830 spin_unlock(&fc->lock); 879 spin_unlock(&fc->lock);
831 fuse_invalidate_attr(inode); 880 fuse_invalidate_attr(inode);
881 fuse_update_ctime(inode);
832 } else if (err == -EINTR) { 882 } else if (err == -EINTR) {
833 fuse_invalidate_attr(inode); 883 fuse_invalidate_attr(inode);
834 } 884 }
@@ -846,6 +896,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
846 attr->size = i_size_read(inode); 896 attr->size = i_size_read(inode);
847 attr->mtime = inode->i_mtime.tv_sec; 897 attr->mtime = inode->i_mtime.tv_sec;
848 attr->mtimensec = inode->i_mtime.tv_nsec; 898 attr->mtimensec = inode->i_mtime.tv_nsec;
899 attr->ctime = inode->i_ctime.tv_sec;
900 attr->ctimensec = inode->i_ctime.tv_nsec;
849 } 901 }
850 902
851 stat->dev = inode->i_sb->s_dev; 903 stat->dev = inode->i_sb->s_dev;
@@ -1504,7 +1556,7 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1504} 1556}
1505 1557
1506static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, 1558static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1507 bool trust_local_mtime) 1559 bool trust_local_cmtime)
1508{ 1560{
1509 unsigned ivalid = iattr->ia_valid; 1561 unsigned ivalid = iattr->ia_valid;
1510 1562
@@ -1523,13 +1575,18 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
1523 if (!(ivalid & ATTR_ATIME_SET)) 1575 if (!(ivalid & ATTR_ATIME_SET))
1524 arg->valid |= FATTR_ATIME_NOW; 1576 arg->valid |= FATTR_ATIME_NOW;
1525 } 1577 }
1526 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { 1578 if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1527 arg->valid |= FATTR_MTIME; 1579 arg->valid |= FATTR_MTIME;
1528 arg->mtime = iattr->ia_mtime.tv_sec; 1580 arg->mtime = iattr->ia_mtime.tv_sec;
1529 arg->mtimensec = iattr->ia_mtime.tv_nsec; 1581 arg->mtimensec = iattr->ia_mtime.tv_nsec;
1530 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) 1582 if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1531 arg->valid |= FATTR_MTIME_NOW; 1583 arg->valid |= FATTR_MTIME_NOW;
1532 } 1584 }
1585 if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1586 arg->valid |= FATTR_CTIME;
1587 arg->ctime = iattr->ia_ctime.tv_sec;
1588 arg->ctimensec = iattr->ia_ctime.tv_nsec;
1589 }
1533} 1590}
1534 1591
1535/* 1592/*
@@ -1597,39 +1654,38 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
1597/* 1654/*
1598 * Flush inode->i_mtime to the server 1655 * Flush inode->i_mtime to the server
1599 */ 1656 */
1600int fuse_flush_mtime(struct file *file, bool nofail) 1657int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1601{ 1658{
1602 struct inode *inode = file->f_mapping->host;
1603 struct fuse_inode *fi = get_fuse_inode(inode);
1604 struct fuse_conn *fc = get_fuse_conn(inode); 1659 struct fuse_conn *fc = get_fuse_conn(inode);
1605 struct fuse_req *req = NULL; 1660 struct fuse_req *req;
1606 struct fuse_setattr_in inarg; 1661 struct fuse_setattr_in inarg;
1607 struct fuse_attr_out outarg; 1662 struct fuse_attr_out outarg;
1608 int err; 1663 int err;
1609 1664
1610 if (nofail) { 1665 req = fuse_get_req_nopages(fc);
1611 req = fuse_get_req_nofail_nopages(fc, file); 1666 if (IS_ERR(req))
1612 } else { 1667 return PTR_ERR(req);
1613 req = fuse_get_req_nopages(fc);
1614 if (IS_ERR(req))
1615 return PTR_ERR(req);
1616 }
1617 1668
1618 memset(&inarg, 0, sizeof(inarg)); 1669 memset(&inarg, 0, sizeof(inarg));
1619 memset(&outarg, 0, sizeof(outarg)); 1670 memset(&outarg, 0, sizeof(outarg));
1620 1671
1621 inarg.valid |= FATTR_MTIME; 1672 inarg.valid = FATTR_MTIME;
1622 inarg.mtime = inode->i_mtime.tv_sec; 1673 inarg.mtime = inode->i_mtime.tv_sec;
1623 inarg.mtimensec = inode->i_mtime.tv_nsec; 1674 inarg.mtimensec = inode->i_mtime.tv_nsec;
1624 1675 if (fc->minor >= 23) {
1676 inarg.valid |= FATTR_CTIME;
1677 inarg.ctime = inode->i_ctime.tv_sec;
1678 inarg.ctimensec = inode->i_ctime.tv_nsec;
1679 }
1680 if (ff) {
1681 inarg.valid |= FATTR_FH;
1682 inarg.fh = ff->fh;
1683 }
1625 fuse_setattr_fill(fc, req, inode, &inarg, &outarg); 1684 fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
1626 fuse_request_send(fc, req); 1685 fuse_request_send(fc, req);
1627 err = req->out.h.error; 1686 err = req->out.h.error;
1628 fuse_put_request(fc, req); 1687 fuse_put_request(fc, req);
1629 1688
1630 if (!err)
1631 clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
1632
1633 return err; 1689 return err;
1634} 1690}
1635 1691
@@ -1653,7 +1709,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1653 bool is_wb = fc->writeback_cache; 1709 bool is_wb = fc->writeback_cache;
1654 loff_t oldsize; 1710 loff_t oldsize;
1655 int err; 1711 int err;
1656 bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); 1712 bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1657 1713
1658 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 1714 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1659 attr->ia_valid |= ATTR_FORCE; 1715 attr->ia_valid |= ATTR_FORCE;
@@ -1678,11 +1734,13 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1678 if (is_truncate) { 1734 if (is_truncate) {
1679 fuse_set_nowrite(inode); 1735 fuse_set_nowrite(inode);
1680 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1736 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1737 if (trust_local_cmtime && attr->ia_size != inode->i_size)
1738 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1681 } 1739 }
1682 1740
1683 memset(&inarg, 0, sizeof(inarg)); 1741 memset(&inarg, 0, sizeof(inarg));
1684 memset(&outarg, 0, sizeof(outarg)); 1742 memset(&outarg, 0, sizeof(outarg));
1685 iattr_to_fattr(attr, &inarg, trust_local_mtime); 1743 iattr_to_fattr(attr, &inarg, trust_local_cmtime);
1686 if (file) { 1744 if (file) {
1687 struct fuse_file *ff = file->private_data; 1745 struct fuse_file *ff = file->private_data;
1688 inarg.valid |= FATTR_FH; 1746 inarg.valid |= FATTR_FH;
@@ -1711,9 +1769,12 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1711 1769
1712 spin_lock(&fc->lock); 1770 spin_lock(&fc->lock);
1713 /* the kernel maintains i_mtime locally */ 1771 /* the kernel maintains i_mtime locally */
1714 if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { 1772 if (trust_local_cmtime) {
1715 inode->i_mtime = attr->ia_mtime; 1773 if (attr->ia_valid & ATTR_MTIME)
1716 clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); 1774 inode->i_mtime = attr->ia_mtime;
1775 if (attr->ia_valid & ATTR_CTIME)
1776 inode->i_ctime = attr->ia_ctime;
1777 /* FIXME: clear I_DIRTY_SYNC? */
1717 } 1778 }
1718 1779
1719 fuse_change_attributes_common(inode, &outarg.attr, 1780 fuse_change_attributes_common(inode, &outarg.attr,
@@ -1810,8 +1871,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
1810 fc->no_setxattr = 1; 1871 fc->no_setxattr = 1;
1811 err = -EOPNOTSUPP; 1872 err = -EOPNOTSUPP;
1812 } 1873 }
1813 if (!err) 1874 if (!err) {
1814 fuse_invalidate_attr(inode); 1875 fuse_invalidate_attr(inode);
1876 fuse_update_ctime(inode);
1877 }
1815 return err; 1878 return err;
1816} 1879}
1817 1880
@@ -1941,20 +2004,11 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
1941 fc->no_removexattr = 1; 2004 fc->no_removexattr = 1;
1942 err = -EOPNOTSUPP; 2005 err = -EOPNOTSUPP;
1943 } 2006 }
1944 if (!err) 2007 if (!err) {
1945 fuse_invalidate_attr(inode); 2008 fuse_invalidate_attr(inode);
1946 return err; 2009 fuse_update_ctime(inode);
1947}
1948
1949static int fuse_update_time(struct inode *inode, struct timespec *now,
1950 int flags)
1951{
1952 if (flags & S_MTIME) {
1953 inode->i_mtime = *now;
1954 set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state);
1955 BUG_ON(!S_ISREG(inode->i_mode));
1956 } 2010 }
1957 return 0; 2011 return err;
1958} 2012}
1959 2013
1960static const struct inode_operations fuse_dir_inode_operations = { 2014static const struct inode_operations fuse_dir_inode_operations = {
@@ -1964,6 +2018,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
1964 .unlink = fuse_unlink, 2018 .unlink = fuse_unlink,
1965 .rmdir = fuse_rmdir, 2019 .rmdir = fuse_rmdir,
1966 .rename = fuse_rename, 2020 .rename = fuse_rename,
2021 .rename2 = fuse_rename2,
1967 .link = fuse_link, 2022 .link = fuse_link,
1968 .setattr = fuse_setattr, 2023 .setattr = fuse_setattr,
1969 .create = fuse_create, 2024 .create = fuse_create,
@@ -1996,7 +2051,6 @@ static const struct inode_operations fuse_common_inode_operations = {
1996 .getxattr = fuse_getxattr, 2051 .getxattr = fuse_getxattr,
1997 .listxattr = fuse_listxattr, 2052 .listxattr = fuse_listxattr,
1998 .removexattr = fuse_removexattr, 2053 .removexattr = fuse_removexattr,
1999 .update_time = fuse_update_time,
2000}; 2054};
2001 2055
2002static const struct inode_operations fuse_symlink_inode_operations = { 2056static const struct inode_operations fuse_symlink_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 13f8bdec5110..96d513e01a5d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -223,6 +223,8 @@ void fuse_finish_open(struct inode *inode, struct file *file)
223 i_size_write(inode, 0); 223 i_size_write(inode, 0);
224 spin_unlock(&fc->lock); 224 spin_unlock(&fc->lock);
225 fuse_invalidate_attr(inode); 225 fuse_invalidate_attr(inode);
226 if (fc->writeback_cache)
227 file_update_time(file);
226 } 228 }
227 if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) 229 if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
228 fuse_link_write_file(file); 230 fuse_link_write_file(file);
@@ -232,18 +234,26 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
232{ 234{
233 struct fuse_conn *fc = get_fuse_conn(inode); 235 struct fuse_conn *fc = get_fuse_conn(inode);
234 int err; 236 int err;
237 bool lock_inode = (file->f_flags & O_TRUNC) &&
238 fc->atomic_o_trunc &&
239 fc->writeback_cache;
235 240
236 err = generic_file_open(inode, file); 241 err = generic_file_open(inode, file);
237 if (err) 242 if (err)
238 return err; 243 return err;
239 244
245 if (lock_inode)
246 mutex_lock(&inode->i_mutex);
247
240 err = fuse_do_open(fc, get_node_id(inode), file, isdir); 248 err = fuse_do_open(fc, get_node_id(inode), file, isdir);
241 if (err)
242 return err;
243 249
244 fuse_finish_open(inode, file); 250 if (!err)
251 fuse_finish_open(inode, file);
245 252
246 return 0; 253 if (lock_inode)
254 mutex_unlock(&inode->i_mutex);
255
256 return err;
247} 257}
248 258
249static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) 259static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
@@ -314,10 +324,7 @@ static int fuse_release(struct inode *inode, struct file *file)
314 324
315 /* see fuse_vma_close() for !writeback_cache case */ 325 /* see fuse_vma_close() for !writeback_cache case */
316 if (fc->writeback_cache) 326 if (fc->writeback_cache)
317 filemap_write_and_wait(file->f_mapping); 327 write_inode_now(inode, 1);
318
319 if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state))
320 fuse_flush_mtime(file, true);
321 328
322 fuse_release_common(file, FUSE_RELEASE); 329 fuse_release_common(file, FUSE_RELEASE);
323 330
@@ -439,7 +446,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
439 if (fc->no_flush) 446 if (fc->no_flush)
440 return 0; 447 return 0;
441 448
442 err = filemap_write_and_wait(file->f_mapping); 449 err = write_inode_now(inode, 1);
443 if (err) 450 if (err)
444 return err; 451 return err;
445 452
@@ -480,13 +487,6 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
480 if (is_bad_inode(inode)) 487 if (is_bad_inode(inode))
481 return -EIO; 488 return -EIO;
482 489
483 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
484 if (err)
485 return err;
486
487 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
488 return 0;
489
490 mutex_lock(&inode->i_mutex); 490 mutex_lock(&inode->i_mutex);
491 491
492 /* 492 /*
@@ -494,17 +494,17 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
494 * wait for all outstanding writes, before sending the FSYNC 494 * wait for all outstanding writes, before sending the FSYNC
495 * request. 495 * request.
496 */ 496 */
497 err = write_inode_now(inode, 0); 497 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
498 if (err) 498 if (err)
499 goto out; 499 goto out;
500 500
501 fuse_sync_writes(inode); 501 fuse_sync_writes(inode);
502 err = sync_inode_metadata(inode, 1);
503 if (err)
504 goto out;
502 505
503 if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { 506 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
504 int err = fuse_flush_mtime(file, false); 507 goto out;
505 if (err)
506 goto out;
507 }
508 508
509 req = fuse_get_req_nopages(fc); 509 req = fuse_get_req_nopages(fc);
510 if (IS_ERR(req)) { 510 if (IS_ERR(req)) {
@@ -1659,13 +1659,13 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1659 fuse_writepage_free(fc, req); 1659 fuse_writepage_free(fc, req);
1660} 1660}
1661 1661
1662static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, 1662static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
1663 struct fuse_inode *fi) 1663 struct fuse_inode *fi)
1664{ 1664{
1665 struct fuse_file *ff = NULL; 1665 struct fuse_file *ff = NULL;
1666 1666
1667 spin_lock(&fc->lock); 1667 spin_lock(&fc->lock);
1668 if (!WARN_ON(list_empty(&fi->write_files))) { 1668 if (!list_empty(&fi->write_files)) {
1669 ff = list_entry(fi->write_files.next, struct fuse_file, 1669 ff = list_entry(fi->write_files.next, struct fuse_file,
1670 write_entry); 1670 write_entry);
1671 fuse_file_get(ff); 1671 fuse_file_get(ff);
@@ -1675,6 +1675,29 @@ static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
1675 return ff; 1675 return ff;
1676} 1676}
1677 1677
1678static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
1679 struct fuse_inode *fi)
1680{
1681 struct fuse_file *ff = __fuse_write_file_get(fc, fi);
1682 WARN_ON(!ff);
1683 return ff;
1684}
1685
1686int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
1687{
1688 struct fuse_conn *fc = get_fuse_conn(inode);
1689 struct fuse_inode *fi = get_fuse_inode(inode);
1690 struct fuse_file *ff;
1691 int err;
1692
1693 ff = __fuse_write_file_get(fc, fi);
1694 err = fuse_flush_times(inode, ff);
1695 if (ff)
1696 fuse_file_put(ff, 0);
1697
1698 return err;
1699}
1700
1678static int fuse_writepage_locked(struct page *page) 1701static int fuse_writepage_locked(struct page *page)
1679{ 1702{
1680 struct address_space *mapping = page->mapping; 1703 struct address_space *mapping = page->mapping;
@@ -2972,6 +2995,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2972 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || 2995 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
2973 (mode & FALLOC_FL_PUNCH_HOLE); 2996 (mode & FALLOC_FL_PUNCH_HOLE);
2974 2997
2998 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2999 return -EOPNOTSUPP;
3000
2975 if (fc->no_fallocate) 3001 if (fc->no_fallocate)
2976 return -EOPNOTSUPP; 3002 return -EOPNOTSUPP;
2977 3003
@@ -3017,12 +3043,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
3017 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3043 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
3018 bool changed = fuse_write_update_size(inode, offset + length); 3044 bool changed = fuse_write_update_size(inode, offset + length);
3019 3045
3020 if (changed && fc->writeback_cache) { 3046 if (changed && fc->writeback_cache)
3021 struct fuse_inode *fi = get_fuse_inode(inode); 3047 file_update_time(file);
3022
3023 inode->i_mtime = current_fs_time(inode->i_sb);
3024 set_bit(FUSE_I_MTIME_DIRTY, &fi->state);
3025 }
3026 } 3048 }
3027 3049
3028 if (mode & FALLOC_FL_PUNCH_HOLE) 3050 if (mode & FALLOC_FL_PUNCH_HOLE)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a257ed8ebee6..7aa5c75e0de1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -119,8 +119,6 @@ enum {
119 FUSE_I_INIT_RDPLUS, 119 FUSE_I_INIT_RDPLUS,
120 /** An operation changing file size is in progress */ 120 /** An operation changing file size is in progress */
121 FUSE_I_SIZE_UNSTABLE, 121 FUSE_I_SIZE_UNSTABLE,
122 /** i_mtime has been updated locally; a flush to userspace needed */
123 FUSE_I_MTIME_DIRTY,
124}; 122};
125 123
126struct fuse_conn; 124struct fuse_conn;
@@ -544,6 +542,9 @@ struct fuse_conn {
544 /** Is fallocate not implemented by fs? */ 542 /** Is fallocate not implemented by fs? */
545 unsigned no_fallocate:1; 543 unsigned no_fallocate:1;
546 544
545 /** Is rename with flags implemented by fs? */
546 unsigned no_rename2:1;
547
547 /** Use enhanced/automatic page cache invalidation. */ 548 /** Use enhanced/automatic page cache invalidation. */
548 unsigned auto_inval_data:1; 549 unsigned auto_inval_data:1;
549 550
@@ -725,7 +726,7 @@ int fuse_dev_init(void);
725void fuse_dev_cleanup(void); 726void fuse_dev_cleanup(void);
726 727
727int fuse_ctl_init(void); 728int fuse_ctl_init(void);
728void fuse_ctl_cleanup(void); 729void __exit fuse_ctl_cleanup(void);
729 730
730/** 731/**
731 * Allocate a request 732 * Allocate a request
@@ -891,7 +892,8 @@ int fuse_dev_release(struct inode *inode, struct file *file);
891 892
892bool fuse_write_update_size(struct inode *inode, loff_t pos); 893bool fuse_write_update_size(struct inode *inode, loff_t pos);
893 894
894int fuse_flush_mtime(struct file *file, bool nofail); 895int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
896int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
895 897
896int fuse_do_setattr(struct inode *inode, struct iattr *attr, 898int fuse_do_setattr(struct inode *inode, struct iattr *attr,
897 struct file *file); 899 struct file *file);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8d611696fcad..754dcf23de8a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -175,9 +175,9 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
175 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { 175 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
176 inode->i_mtime.tv_sec = attr->mtime; 176 inode->i_mtime.tv_sec = attr->mtime;
177 inode->i_mtime.tv_nsec = attr->mtimensec; 177 inode->i_mtime.tv_nsec = attr->mtimensec;
178 inode->i_ctime.tv_sec = attr->ctime;
179 inode->i_ctime.tv_nsec = attr->ctimensec;
178 } 180 }
179 inode->i_ctime.tv_sec = attr->ctime;
180 inode->i_ctime.tv_nsec = attr->ctimensec;
181 181
182 if (attr->blksize != 0) 182 if (attr->blksize != 0)
183 inode->i_blkbits = ilog2(attr->blksize); 183 inode->i_blkbits = ilog2(attr->blksize);
@@ -256,6 +256,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
256 inode->i_size = attr->size; 256 inode->i_size = attr->size;
257 inode->i_mtime.tv_sec = attr->mtime; 257 inode->i_mtime.tv_sec = attr->mtime;
258 inode->i_mtime.tv_nsec = attr->mtimensec; 258 inode->i_mtime.tv_nsec = attr->mtimensec;
259 inode->i_ctime.tv_sec = attr->ctime;
260 inode->i_ctime.tv_nsec = attr->ctimensec;
259 if (S_ISREG(inode->i_mode)) { 261 if (S_ISREG(inode->i_mode)) {
260 fuse_init_common(inode); 262 fuse_init_common(inode);
261 fuse_init_file_inode(inode); 263 fuse_init_file_inode(inode);
@@ -303,7 +305,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
303 305
304 if ((inode->i_state & I_NEW)) { 306 if ((inode->i_state & I_NEW)) {
305 inode->i_flags |= S_NOATIME; 307 inode->i_flags |= S_NOATIME;
306 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) 308 if (!fc->writeback_cache || !S_ISREG(attr->mode))
307 inode->i_flags |= S_NOCMTIME; 309 inode->i_flags |= S_NOCMTIME;
308 inode->i_generation = generation; 310 inode->i_generation = generation;
309 inode->i_data.backing_dev_info = &fc->bdi; 311 inode->i_data.backing_dev_info = &fc->bdi;
@@ -788,6 +790,7 @@ static const struct super_operations fuse_super_operations = {
788 .alloc_inode = fuse_alloc_inode, 790 .alloc_inode = fuse_alloc_inode,
789 .destroy_inode = fuse_destroy_inode, 791 .destroy_inode = fuse_destroy_inode,
790 .evict_inode = fuse_evict_inode, 792 .evict_inode = fuse_evict_inode,
793 .write_inode = fuse_write_inode,
791 .drop_inode = generic_delete_inode, 794 .drop_inode = generic_delete_inode,
792 .remount_fs = fuse_remount_fs, 795 .remount_fs = fuse_remount_fs,
793 .put_super = fuse_put_super, 796 .put_super = fuse_put_super,
@@ -890,6 +893,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
890 fc->async_dio = 1; 893 fc->async_dio = 1;
891 if (arg->flags & FUSE_WRITEBACK_CACHE) 894 if (arg->flags & FUSE_WRITEBACK_CACHE)
892 fc->writeback_cache = 1; 895 fc->writeback_cache = 1;
896 if (arg->time_gran && arg->time_gran <= 1000000000)
897 fc->sb->s_time_gran = arg->time_gran;
898 else
899 fc->sb->s_time_gran = 1000000000;
900
893 } else { 901 } else {
894 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 902 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
895 fc->no_lock = 1; 903 fc->no_lock = 1;
@@ -996,7 +1004,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
996 if (sb->s_flags & MS_MANDLOCK) 1004 if (sb->s_flags & MS_MANDLOCK)
997 goto err; 1005 goto err;
998 1006
999 sb->s_flags &= ~MS_NOSEC; 1007 sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
1000 1008
1001 if (!parse_fuse_opt((char *) data, &d, is_bdev)) 1009 if (!parse_fuse_opt((char *) data, &d, is_bdev))
1002 goto err; 1010 goto err;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 204027520937..e19d4c0cacae 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1030,6 +1030,11 @@ static int __init init_hugetlbfs_fs(void)
1030 int error; 1030 int error;
1031 int i; 1031 int i;
1032 1032
1033 if (!hugepages_supported()) {
1034 pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n");
1035 return -ENOTSUPP;
1036 }
1037
1033 error = bdi_init(&hugetlbfs_backing_dev_info); 1038 error = bdi_init(&hugetlbfs_backing_dev_info);
1034 if (error) 1039 if (error)
1035 return error; 1040 return error;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 78f3403300af..ac127cd008bf 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -232,9 +232,6 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
232 struct rb_node **node = &kn->parent->dir.children.rb_node; 232 struct rb_node **node = &kn->parent->dir.children.rb_node;
233 struct rb_node *parent = NULL; 233 struct rb_node *parent = NULL;
234 234
235 if (kernfs_type(kn) == KERNFS_DIR)
236 kn->parent->dir.subdirs++;
237
238 while (*node) { 235 while (*node) {
239 struct kernfs_node *pos; 236 struct kernfs_node *pos;
240 int result; 237 int result;
@@ -249,9 +246,15 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
249 else 246 else
250 return -EEXIST; 247 return -EEXIST;
251 } 248 }
249
252 /* add new node and rebalance the tree */ 250 /* add new node and rebalance the tree */
253 rb_link_node(&kn->rb, parent, node); 251 rb_link_node(&kn->rb, parent, node);
254 rb_insert_color(&kn->rb, &kn->parent->dir.children); 252 rb_insert_color(&kn->rb, &kn->parent->dir.children);
253
254 /* successfully added, account subdir number */
255 if (kernfs_type(kn) == KERNFS_DIR)
256 kn->parent->dir.subdirs++;
257
255 return 0; 258 return 0;
256} 259}
257 260
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8034706a7af8..e01ea4a14a01 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -484,6 +484,8 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
484 484
485 ops = kernfs_ops(of->kn); 485 ops = kernfs_ops(of->kn);
486 rc = ops->mmap(of, vma); 486 rc = ops->mmap(of, vma);
487 if (rc)
488 goto out_put;
487 489
488 /* 490 /*
489 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 491 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index abb0f1f53d93..985217626e66 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -48,14 +48,18 @@ void __init kernfs_inode_init(void)
48 48
49static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn) 49static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
50{ 50{
51 static DEFINE_MUTEX(iattr_mutex);
52 struct kernfs_iattrs *ret;
51 struct iattr *iattrs; 53 struct iattr *iattrs;
52 54
55 mutex_lock(&iattr_mutex);
56
53 if (kn->iattr) 57 if (kn->iattr)
54 return kn->iattr; 58 goto out_unlock;
55 59
56 kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL); 60 kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL);
57 if (!kn->iattr) 61 if (!kn->iattr)
58 return NULL; 62 goto out_unlock;
59 iattrs = &kn->iattr->ia_iattr; 63 iattrs = &kn->iattr->ia_iattr;
60 64
61 /* assign default attributes */ 65 /* assign default attributes */
@@ -65,8 +69,10 @@ static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
65 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; 69 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
66 70
67 simple_xattrs_init(&kn->iattr->xattrs); 71 simple_xattrs_init(&kn->iattr->xattrs);
68 72out_unlock:
69 return kn->iattr; 73 ret = kn->iattr;
74 mutex_unlock(&iattr_mutex);
75 return ret;
70} 76}
71 77
72static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr) 78static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
diff --git a/fs/locks.c b/fs/locks.c
index 13fc7a6d380a..e663aeac579e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -135,7 +135,7 @@
135#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) 135#define IS_POSIX(fl) (fl->fl_flags & FL_POSIX)
136#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) 136#define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK)
137#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) 137#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG))
138#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) 138#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
139 139
140static bool lease_breaking(struct file_lock *fl) 140static bool lease_breaking(struct file_lock *fl)
141{ 141{
@@ -564,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker,
564 BUG_ON(!list_empty(&waiter->fl_block)); 564 BUG_ON(!list_empty(&waiter->fl_block));
565 waiter->fl_next = blocker; 565 waiter->fl_next = blocker;
566 list_add_tail(&waiter->fl_block, &blocker->fl_block); 566 list_add_tail(&waiter->fl_block, &blocker->fl_block);
567 if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) 567 if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
568 locks_insert_global_blocked(waiter); 568 locks_insert_global_blocked(waiter);
569} 569}
570 570
@@ -759,12 +759,12 @@ EXPORT_SYMBOL(posix_test_lock);
759 * of tasks (such as posix threads) sharing the same open file table. 759 * of tasks (such as posix threads) sharing the same open file table.
760 * To handle those cases, we just bail out after a few iterations. 760 * To handle those cases, we just bail out after a few iterations.
761 * 761 *
762 * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. 762 * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
763 * Because the owner is not even nominally tied to a thread of 763 * Because the owner is not even nominally tied to a thread of
764 * execution, the deadlock detection below can't reasonably work well. Just 764 * execution, the deadlock detection below can't reasonably work well. Just
765 * skip it for those. 765 * skip it for those.
766 * 766 *
767 * In principle, we could do a more limited deadlock detection on FL_FILE_PVT 767 * In principle, we could do a more limited deadlock detection on FL_OFDLCK
768 * locks that just checks for the case where two tasks are attempting to 768 * locks that just checks for the case where two tasks are attempting to
769 * upgrade from read to write locks on the same inode. 769 * upgrade from read to write locks on the same inode.
770 */ 770 */
@@ -791,9 +791,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
791 791
792 /* 792 /*
793 * This deadlock detector can't reasonably detect deadlocks with 793 * This deadlock detector can't reasonably detect deadlocks with
794 * FL_FILE_PVT locks, since they aren't owned by a process, per-se. 794 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
795 */ 795 */
796 if (IS_FILE_PVT(caller_fl)) 796 if (IS_OFDLCK(caller_fl))
797 return 0; 797 return 0;
798 798
799 while ((block_fl = what_owner_is_waiting_for(block_fl))) { 799 while ((block_fl = what_owner_is_waiting_for(block_fl))) {
@@ -1391,11 +1391,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1391 1391
1392restart: 1392restart:
1393 break_time = flock->fl_break_time; 1393 break_time = flock->fl_break_time;
1394 if (break_time != 0) { 1394 if (break_time != 0)
1395 break_time -= jiffies; 1395 break_time -= jiffies;
1396 if (break_time == 0) 1396 if (break_time == 0)
1397 break_time++; 1397 break_time++;
1398 }
1399 locks_insert_block(flock, new_fl); 1398 locks_insert_block(flock, new_fl);
1400 spin_unlock(&inode->i_lock); 1399 spin_unlock(&inode->i_lock);
1401 error = wait_event_interruptible_timeout(new_fl->fl_wait, 1400 error = wait_event_interruptible_timeout(new_fl->fl_wait,
@@ -1891,7 +1890,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
1891 1890
1892static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) 1891static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1893{ 1892{
1894 flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; 1893 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1895#if BITS_PER_LONG == 32 1894#if BITS_PER_LONG == 32
1896 /* 1895 /*
1897 * Make sure we can represent the posix lock via 1896 * Make sure we can represent the posix lock via
@@ -1913,7 +1912,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
1913#if BITS_PER_LONG == 32 1912#if BITS_PER_LONG == 32
1914static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) 1913static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
1915{ 1914{
1916 flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; 1915 flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
1917 flock->l_start = fl->fl_start; 1916 flock->l_start = fl->fl_start;
1918 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : 1917 flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
1919 fl->fl_end - fl->fl_start + 1; 1918 fl->fl_end - fl->fl_start + 1;
@@ -1942,13 +1941,13 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
1942 if (error) 1941 if (error)
1943 goto out; 1942 goto out;
1944 1943
1945 if (cmd == F_GETLKP) { 1944 if (cmd == F_OFD_GETLK) {
1946 error = -EINVAL; 1945 error = -EINVAL;
1947 if (flock.l_pid != 0) 1946 if (flock.l_pid != 0)
1948 goto out; 1947 goto out;
1949 1948
1950 cmd = F_GETLK; 1949 cmd = F_GETLK;
1951 file_lock.fl_flags |= FL_FILE_PVT; 1950 file_lock.fl_flags |= FL_OFDLCK;
1952 file_lock.fl_owner = (fl_owner_t)filp; 1951 file_lock.fl_owner = (fl_owner_t)filp;
1953 } 1952 }
1954 1953
@@ -2074,25 +2073,25 @@ again:
2074 2073
2075 /* 2074 /*
2076 * If the cmd is requesting file-private locks, then set the 2075 * If the cmd is requesting file-private locks, then set the
2077 * FL_FILE_PVT flag and override the owner. 2076 * FL_OFDLCK flag and override the owner.
2078 */ 2077 */
2079 switch (cmd) { 2078 switch (cmd) {
2080 case F_SETLKP: 2079 case F_OFD_SETLK:
2081 error = -EINVAL; 2080 error = -EINVAL;
2082 if (flock.l_pid != 0) 2081 if (flock.l_pid != 0)
2083 goto out; 2082 goto out;
2084 2083
2085 cmd = F_SETLK; 2084 cmd = F_SETLK;
2086 file_lock->fl_flags |= FL_FILE_PVT; 2085 file_lock->fl_flags |= FL_OFDLCK;
2087 file_lock->fl_owner = (fl_owner_t)filp; 2086 file_lock->fl_owner = (fl_owner_t)filp;
2088 break; 2087 break;
2089 case F_SETLKPW: 2088 case F_OFD_SETLKW:
2090 error = -EINVAL; 2089 error = -EINVAL;
2091 if (flock.l_pid != 0) 2090 if (flock.l_pid != 0)
2092 goto out; 2091 goto out;
2093 2092
2094 cmd = F_SETLKW; 2093 cmd = F_SETLKW;
2095 file_lock->fl_flags |= FL_FILE_PVT; 2094 file_lock->fl_flags |= FL_OFDLCK;
2096 file_lock->fl_owner = (fl_owner_t)filp; 2095 file_lock->fl_owner = (fl_owner_t)filp;
2097 /* Fallthrough */ 2096 /* Fallthrough */
2098 case F_SETLKW: 2097 case F_SETLKW:
@@ -2144,13 +2143,13 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
2144 if (error) 2143 if (error)
2145 goto out; 2144 goto out;
2146 2145
2147 if (cmd == F_GETLKP) { 2146 if (cmd == F_OFD_GETLK) {
2148 error = -EINVAL; 2147 error = -EINVAL;
2149 if (flock.l_pid != 0) 2148 if (flock.l_pid != 0)
2150 goto out; 2149 goto out;
2151 2150
2152 cmd = F_GETLK64; 2151 cmd = F_GETLK64;
2153 file_lock.fl_flags |= FL_FILE_PVT; 2152 file_lock.fl_flags |= FL_OFDLCK;
2154 file_lock.fl_owner = (fl_owner_t)filp; 2153 file_lock.fl_owner = (fl_owner_t)filp;
2155 } 2154 }
2156 2155
@@ -2209,25 +2208,25 @@ again:
2209 2208
2210 /* 2209 /*
2211 * If the cmd is requesting file-private locks, then set the 2210 * If the cmd is requesting file-private locks, then set the
2212 * FL_FILE_PVT flag and override the owner. 2211 * FL_OFDLCK flag and override the owner.
2213 */ 2212 */
2214 switch (cmd) { 2213 switch (cmd) {
2215 case F_SETLKP: 2214 case F_OFD_SETLK:
2216 error = -EINVAL; 2215 error = -EINVAL;
2217 if (flock.l_pid != 0) 2216 if (flock.l_pid != 0)
2218 goto out; 2217 goto out;
2219 2218
2220 cmd = F_SETLK64; 2219 cmd = F_SETLK64;
2221 file_lock->fl_flags |= FL_FILE_PVT; 2220 file_lock->fl_flags |= FL_OFDLCK;
2222 file_lock->fl_owner = (fl_owner_t)filp; 2221 file_lock->fl_owner = (fl_owner_t)filp;
2223 break; 2222 break;
2224 case F_SETLKPW: 2223 case F_OFD_SETLKW:
2225 error = -EINVAL; 2224 error = -EINVAL;
2226 if (flock.l_pid != 0) 2225 if (flock.l_pid != 0)
2227 goto out; 2226 goto out;
2228 2227
2229 cmd = F_SETLKW64; 2228 cmd = F_SETLKW64;
2230 file_lock->fl_flags |= FL_FILE_PVT; 2229 file_lock->fl_flags |= FL_OFDLCK;
2231 file_lock->fl_owner = (fl_owner_t)filp; 2230 file_lock->fl_owner = (fl_owner_t)filp;
2232 /* Fallthrough */ 2231 /* Fallthrough */
2233 case F_SETLKW64: 2232 case F_SETLKW64:
@@ -2413,8 +2412,8 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2413 if (IS_POSIX(fl)) { 2412 if (IS_POSIX(fl)) {
2414 if (fl->fl_flags & FL_ACCESS) 2413 if (fl->fl_flags & FL_ACCESS)
2415 seq_printf(f, "ACCESS"); 2414 seq_printf(f, "ACCESS");
2416 else if (IS_FILE_PVT(fl)) 2415 else if (IS_OFDLCK(fl))
2417 seq_printf(f, "FLPVT "); 2416 seq_printf(f, "OFDLCK");
2418 else 2417 else
2419 seq_printf(f, "POSIX "); 2418 seq_printf(f, "POSIX ");
2420 2419
diff --git a/fs/namei.c b/fs/namei.c
index c6157c894fce..80168273396b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1542,7 +1542,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1542 inode = path->dentry->d_inode; 1542 inode = path->dentry->d_inode;
1543 } 1543 }
1544 err = -ENOENT; 1544 err = -ENOENT;
1545 if (!inode) 1545 if (!inode || d_is_negative(path->dentry))
1546 goto out_path_put; 1546 goto out_path_put;
1547 1547
1548 if (should_follow_link(path->dentry, follow)) { 1548 if (should_follow_link(path->dentry, follow)) {
@@ -2249,7 +2249,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2249 mutex_unlock(&dir->d_inode->i_mutex); 2249 mutex_unlock(&dir->d_inode->i_mutex);
2250 2250
2251done: 2251done:
2252 if (!dentry->d_inode) { 2252 if (!dentry->d_inode || d_is_negative(dentry)) {
2253 error = -ENOENT; 2253 error = -ENOENT;
2254 dput(dentry); 2254 dput(dentry);
2255 goto out; 2255 goto out;
@@ -2994,7 +2994,7 @@ retry_lookup:
2994finish_lookup: 2994finish_lookup:
2995 /* we _can_ be in RCU mode here */ 2995 /* we _can_ be in RCU mode here */
2996 error = -ENOENT; 2996 error = -ENOENT;
2997 if (d_is_negative(path->dentry)) { 2997 if (!inode || d_is_negative(path->dentry)) {
2998 path_to_nameidata(path, nd); 2998 path_to_nameidata(path, nd);
2999 goto out; 2999 goto out;
3000 } 3000 }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 39c8ef875f91..2c73cae9899d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -654,9 +654,11 @@ static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
654 654
655static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) 655static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
656{ 656{
657 int maxtime = max_cb_time(clp->net);
657 struct rpc_timeout timeparms = { 658 struct rpc_timeout timeparms = {
658 .to_initval = max_cb_time(clp->net), 659 .to_initval = maxtime,
659 .to_retries = 0, 660 .to_retries = 0,
661 .to_maxval = maxtime,
660 }; 662 };
661 struct rpc_create_args args = { 663 struct rpc_create_args args = {
662 .net = clp->net, 664 .net = clp->net,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2723c1badd01..18881f34737a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3627,14 +3627,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
3627 /* nfsd4_check_resp_size guarantees enough room for error status */ 3627 /* nfsd4_check_resp_size guarantees enough room for error status */
3628 if (!op->status) 3628 if (!op->status)
3629 op->status = nfsd4_check_resp_size(resp, 0); 3629 op->status = nfsd4_check_resp_size(resp, 0);
3630 if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
3631 struct nfsd4_slot *slot = resp->cstate.slot;
3632
3633 if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
3634 op->status = nfserr_rep_too_big_to_cache;
3635 else
3636 op->status = nfserr_rep_too_big;
3637 }
3638 if (so) { 3630 if (so) {
3639 so->so_replay.rp_status = op->status; 3631 so->so_replay.rp_status = op->status;
3640 so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); 3632 so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 4e565c814309..732648b270dc 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -698,6 +698,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
698 } 698 }
699 group->overflow_event = &oevent->fse; 699 group->overflow_event = &oevent->fse;
700 700
701 if (force_o_largefile())
702 event_f_flags |= O_LARGEFILE;
701 group->fanotify_data.f_flags = event_f_flags; 703 group->fanotify_data.f_flags = event_f_flags;
702#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 704#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
703 spin_lock_init(&group->fanotify_data.access_lock); 705 spin_lock_init(&group->fanotify_data.access_lock);
diff --git a/fs/open.c b/fs/open.c
index 3d30eb1fc95e..9d64679cec73 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -254,17 +254,22 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
254 return -EBADF; 254 return -EBADF;
255 255
256 /* 256 /*
257 * It's not possible to punch hole or perform collapse range 257 * We can only allow pure fallocate on append only files
258 * on append only file
259 */ 258 */
260 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE) 259 if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
261 && IS_APPEND(inode))
262 return -EPERM; 260 return -EPERM;
263 261
264 if (IS_IMMUTABLE(inode)) 262 if (IS_IMMUTABLE(inode))
265 return -EPERM; 263 return -EPERM;
266 264
267 /* 265 /*
266 * We can not allow to do any fallocate operation on an active
267 * swapfile
268 */
269 if (IS_SWAPFILE(inode))
270 ret = -ETXTBSY;
271
272 /*
268 * Revalidate the write permissions, in case security policy has 273 * Revalidate the write permissions, in case security policy has
269 * changed since the files were opened. 274 * changed since the files were opened.
270 */ 275 */
@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
286 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 291 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
287 return -EFBIG; 292 return -EFBIG;
288 293
289 /*
290 * There is no need to overlap collapse range with EOF, in which case
291 * it is effectively a truncate operation
292 */
293 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
294 (offset + len >= i_size_read(inode)))
295 return -EINVAL;
296
297 if (!file->f_op->fallocate) 294 if (!file->f_op->fallocate)
298 return -EOPNOTSUPP; 295 return -EOPNOTSUPP;
299 296
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 9e363e41dacc..0855f772cd41 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -246,6 +246,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
246 umode_t mode = 0; 246 umode_t mode = 0;
247 int not_equiv = 0; 247 int not_equiv = 0;
248 248
249 /*
250 * A null ACL can always be presented as mode bits.
251 */
252 if (!acl)
253 return 0;
254
249 FOREACH_ACL_ENTRY(pa, acl, pe) { 255 FOREACH_ACL_ENTRY(pa, acl, pe) {
250 switch (pa->e_tag) { 256 switch (pa->e_tag) {
251 case ACL_USER_OBJ: 257 case ACL_USER_OBJ:
diff --git a/fs/super.c b/fs/super.c
index e9dc3c3fe159..48377f7463c0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -800,7 +800,10 @@ void emergency_remount(void)
800 800
801static DEFINE_IDA(unnamed_dev_ida); 801static DEFINE_IDA(unnamed_dev_ida);
802static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 802static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
803static int unnamed_dev_start = 0; /* don't bother trying below it */ 803/* Many userspace utilities consider an FSID of 0 invalid.
804 * Always return at least 1 from get_anon_bdev.
805 */
806static int unnamed_dev_start = 1;
804 807
805int get_anon_bdev(dev_t *p) 808int get_anon_bdev(dev_t *p)
806{ 809{
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1b8b91b67fdb..28cc1acd5439 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -453,95 +453,3 @@ void sysfs_remove_bin_file(struct kobject *kobj,
453 kernfs_remove_by_name(kobj->sd, attr->attr.name); 453 kernfs_remove_by_name(kobj->sd, attr->attr.name);
454} 454}
455EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); 455EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
456
457struct sysfs_schedule_callback_struct {
458 struct list_head workq_list;
459 struct kobject *kobj;
460 void (*func)(void *);
461 void *data;
462 struct module *owner;
463 struct work_struct work;
464};
465
466static struct workqueue_struct *sysfs_workqueue;
467static DEFINE_MUTEX(sysfs_workq_mutex);
468static LIST_HEAD(sysfs_workq);
469static void sysfs_schedule_callback_work(struct work_struct *work)
470{
471 struct sysfs_schedule_callback_struct *ss = container_of(work,
472 struct sysfs_schedule_callback_struct, work);
473
474 (ss->func)(ss->data);
475 kobject_put(ss->kobj);
476 module_put(ss->owner);
477 mutex_lock(&sysfs_workq_mutex);
478 list_del(&ss->workq_list);
479 mutex_unlock(&sysfs_workq_mutex);
480 kfree(ss);
481}
482
483/**
484 * sysfs_schedule_callback - helper to schedule a callback for a kobject
485 * @kobj: object we're acting for.
486 * @func: callback function to invoke later.
487 * @data: argument to pass to @func.
488 * @owner: module owning the callback code
489 *
490 * sysfs attribute methods must not unregister themselves or their parent
491 * kobject (which would amount to the same thing). Attempts to do so will
492 * deadlock, since unregistration is mutually exclusive with driver
493 * callbacks.
494 *
495 * Instead methods can call this routine, which will attempt to allocate
496 * and schedule a workqueue request to call back @func with @data as its
497 * argument in the workqueue's process context. @kobj will be pinned
498 * until @func returns.
499 *
500 * Returns 0 if the request was submitted, -ENOMEM if storage could not
501 * be allocated, -ENODEV if a reference to @owner isn't available,
502 * -EAGAIN if a callback has already been scheduled for @kobj.
503 */
504int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
505 void *data, struct module *owner)
506{
507 struct sysfs_schedule_callback_struct *ss, *tmp;
508
509 if (!try_module_get(owner))
510 return -ENODEV;
511
512 mutex_lock(&sysfs_workq_mutex);
513 list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
514 if (ss->kobj == kobj) {
515 module_put(owner);
516 mutex_unlock(&sysfs_workq_mutex);
517 return -EAGAIN;
518 }
519 mutex_unlock(&sysfs_workq_mutex);
520
521 if (sysfs_workqueue == NULL) {
522 sysfs_workqueue = create_singlethread_workqueue("sysfsd");
523 if (sysfs_workqueue == NULL) {
524 module_put(owner);
525 return -ENOMEM;
526 }
527 }
528
529 ss = kmalloc(sizeof(*ss), GFP_KERNEL);
530 if (!ss) {
531 module_put(owner);
532 return -ENOMEM;
533 }
534 kobject_get(kobj);
535 ss->kobj = kobj;
536 ss->func = func;
537 ss->data = data;
538 ss->owner = owner;
539 INIT_WORK(&ss->work, sysfs_schedule_callback_work);
540 INIT_LIST_HEAD(&ss->workq_list);
541 mutex_lock(&sysfs_workq_mutex);
542 list_add_tail(&ss->workq_list, &sysfs_workq);
543 mutex_unlock(&sysfs_workq_mutex);
544 queue_work(sysfs_workqueue, &ss->work);
545 return 0;
546}
547EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a1266089eca1..a81c7b556896 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1556,7 +1556,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1556 if (c->space_fixup) { 1556 if (c->space_fixup) {
1557 err = ubifs_fixup_free_space(c); 1557 err = ubifs_fixup_free_space(c);
1558 if (err) 1558 if (err)
1559 return err; 1559 goto out;
1560 } 1560 }
1561 1561
1562 err = check_free_space(c); 1562 err = check_free_space(c);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 75df77d09f75..0479c32c5eb1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
1344 /* 1344 /*
1345 * If this is O_DIRECT or the mpage code calling tell them how large 1345 * If this is O_DIRECT or the mpage code calling tell them how large
1346 * the mapping is, so that we can avoid repeated get_blocks calls. 1346 * the mapping is, so that we can avoid repeated get_blocks calls.
1347 *
1348 * If the mapping spans EOF, then we have to break the mapping up as the
1349 * mapping for blocks beyond EOF must be marked new so that sub block
1350 * regions can be correctly zeroed. We can't do this for mappings within
1351 * EOF unless the mapping was just allocated or is unwritten, otherwise
1352 * the callers would overwrite existing data with zeros. Hence we have
1353 * to split the mapping into a range up to and including EOF, and a
1354 * second mapping for beyond EOF.
1347 */ 1355 */
1348 if (direct || size > (1 << inode->i_blkbits)) { 1356 if (direct || size > (1 << inode->i_blkbits)) {
1349 xfs_off_t mapping_size; 1357 xfs_off_t mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
1354 ASSERT(mapping_size > 0); 1362 ASSERT(mapping_size > 0);
1355 if (mapping_size > size) 1363 if (mapping_size > size)
1356 mapping_size = size; 1364 mapping_size = size;
1365 if (offset < i_size_read(inode) &&
1366 offset + mapping_size >= i_size_read(inode)) {
1367 /* limit mapping to block that spans EOF */
1368 mapping_size = roundup_64(i_size_read(inode) - offset,
1369 1 << inode->i_blkbits);
1370 }
1357 if (mapping_size > LONG_MAX) 1371 if (mapping_size > LONG_MAX)
1358 mapping_size = LONG_MAX; 1372 mapping_size = LONG_MAX;
1359 1373
@@ -1566,6 +1580,16 @@ xfs_vm_write_failed(
1566 1580
1567 xfs_vm_kill_delalloc_range(inode, block_offset, 1581 xfs_vm_kill_delalloc_range(inode, block_offset,
1568 block_offset + bh->b_size); 1582 block_offset + bh->b_size);
1583
1584 /*
1585 * This buffer does not contain data anymore. make sure anyone
1586 * who finds it knows that for certain.
1587 */
1588 clear_buffer_delay(bh);
1589 clear_buffer_uptodate(bh);
1590 clear_buffer_mapped(bh);
1591 clear_buffer_new(bh);
1592 clear_buffer_dirty(bh);
1569 } 1593 }
1570 1594
1571} 1595}
@@ -1599,12 +1623,21 @@ xfs_vm_write_begin(
1599 status = __block_write_begin(page, pos, len, xfs_get_blocks); 1623 status = __block_write_begin(page, pos, len, xfs_get_blocks);
1600 if (unlikely(status)) { 1624 if (unlikely(status)) {
1601 struct inode *inode = mapping->host; 1625 struct inode *inode = mapping->host;
1626 size_t isize = i_size_read(inode);
1602 1627
1603 xfs_vm_write_failed(inode, page, pos, len); 1628 xfs_vm_write_failed(inode, page, pos, len);
1604 unlock_page(page); 1629 unlock_page(page);
1605 1630
1606 if (pos + len > i_size_read(inode)) 1631 /*
1607 truncate_pagecache(inode, i_size_read(inode)); 1632 * If the write is beyond EOF, we only want to kill blocks
1633 * allocated in this write, not blocks that were previously
1634 * written successfully.
1635 */
1636 if (pos + len > isize) {
1637 ssize_t start = max_t(ssize_t, pos, isize);
1638
1639 truncate_pagecache_range(inode, start, pos + len);
1640 }
1608 1641
1609 page_cache_release(page); 1642 page_cache_release(page);
1610 page = NULL; 1643 page = NULL;
@@ -1615,9 +1648,12 @@ xfs_vm_write_begin(
1615} 1648}
1616 1649
1617/* 1650/*
1618 * On failure, we only need to kill delalloc blocks beyond EOF because they 1651 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
1619 * will never be written. For blocks within EOF, generic_write_end() zeros them 1652 * this specific write because they will never be written. Previous writes
1620 * so they are safe to leave alone and be written with all the other valid data. 1653 * beyond EOF where block allocation succeeded do not need to be trashed, so
1654 * only new blocks from this write should be trashed. For blocks within
1655 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
1656 * written with all the other valid data.
1621 */ 1657 */
1622STATIC int 1658STATIC int
1623xfs_vm_write_end( 1659xfs_vm_write_end(
@@ -1640,8 +1676,11 @@ xfs_vm_write_end(
1640 loff_t to = pos + len; 1676 loff_t to = pos + len;
1641 1677
1642 if (to > isize) { 1678 if (to > isize) {
1643 truncate_pagecache(inode, isize); 1679 /* only kill blocks in this write beyond EOF */
1680 if (pos > isize)
1681 isize = pos;
1644 xfs_vm_kill_delalloc_range(inode, isize, to); 1682 xfs_vm_kill_delalloc_range(inode, isize, to);
1683 truncate_pagecache_range(inode, isize, to);
1645 } 1684 }
1646 } 1685 }
1647 return ret; 1686 return ret;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01b6a0102fbd..abda1124a70f 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -213,7 +213,7 @@ xfs_attr_calc_size(
213 * Out of line attribute, cannot double split, but 213 * Out of line attribute, cannot double split, but
214 * make room for the attribute value itself. 214 * make room for the attribute value itself.
215 */ 215 */
216 uint dblocks = XFS_B_TO_FSB(mp, valuelen); 216 uint dblocks = xfs_attr3_rmt_blocks(mp, valuelen);
217 nblks += dblocks; 217 nblks += dblocks;
218 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); 218 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
219 } 219 }
@@ -698,11 +698,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
698 698
699 trace_xfs_attr_leaf_replace(args); 699 trace_xfs_attr_leaf_replace(args);
700 700
701 /* save the attribute state for later removal*/
701 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ 702 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
702 args->blkno2 = args->blkno; /* set 2nd entry info*/ 703 args->blkno2 = args->blkno; /* set 2nd entry info*/
703 args->index2 = args->index; 704 args->index2 = args->index;
704 args->rmtblkno2 = args->rmtblkno; 705 args->rmtblkno2 = args->rmtblkno;
705 args->rmtblkcnt2 = args->rmtblkcnt; 706 args->rmtblkcnt2 = args->rmtblkcnt;
707 args->rmtvaluelen2 = args->rmtvaluelen;
708
709 /*
710 * clear the remote attr state now that it is saved so that the
711 * values reflect the state of the attribute we are about to
712 * add, not the attribute we just found and will remove later.
713 */
714 args->rmtblkno = 0;
715 args->rmtblkcnt = 0;
716 args->rmtvaluelen = 0;
706 } 717 }
707 718
708 /* 719 /*
@@ -794,6 +805,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
794 args->blkno = args->blkno2; 805 args->blkno = args->blkno2;
795 args->rmtblkno = args->rmtblkno2; 806 args->rmtblkno = args->rmtblkno2;
796 args->rmtblkcnt = args->rmtblkcnt2; 807 args->rmtblkcnt = args->rmtblkcnt2;
808 args->rmtvaluelen = args->rmtvaluelen2;
797 if (args->rmtblkno) { 809 if (args->rmtblkno) {
798 error = xfs_attr_rmtval_remove(args); 810 error = xfs_attr_rmtval_remove(args);
799 if (error) 811 if (error)
@@ -999,13 +1011,22 @@ restart:
999 1011
1000 trace_xfs_attr_node_replace(args); 1012 trace_xfs_attr_node_replace(args);
1001 1013
1014 /* save the attribute state for later removal*/
1002 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ 1015 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
1003 args->blkno2 = args->blkno; /* set 2nd entry info*/ 1016 args->blkno2 = args->blkno; /* set 2nd entry info*/
1004 args->index2 = args->index; 1017 args->index2 = args->index;
1005 args->rmtblkno2 = args->rmtblkno; 1018 args->rmtblkno2 = args->rmtblkno;
1006 args->rmtblkcnt2 = args->rmtblkcnt; 1019 args->rmtblkcnt2 = args->rmtblkcnt;
1020 args->rmtvaluelen2 = args->rmtvaluelen;
1021
1022 /*
1023 * clear the remote attr state now that it is saved so that the
1024 * values reflect the state of the attribute we are about to
1025 * add, not the attribute we just found and will remove later.
1026 */
1007 args->rmtblkno = 0; 1027 args->rmtblkno = 0;
1008 args->rmtblkcnt = 0; 1028 args->rmtblkcnt = 0;
1029 args->rmtvaluelen = 0;
1009 } 1030 }
1010 1031
1011 retval = xfs_attr3_leaf_add(blk->bp, state->args); 1032 retval = xfs_attr3_leaf_add(blk->bp, state->args);
@@ -1133,6 +1154,7 @@ restart:
1133 args->blkno = args->blkno2; 1154 args->blkno = args->blkno2;
1134 args->rmtblkno = args->rmtblkno2; 1155 args->rmtblkno = args->rmtblkno2;
1135 args->rmtblkcnt = args->rmtblkcnt2; 1156 args->rmtblkcnt = args->rmtblkcnt2;
1157 args->rmtvaluelen = args->rmtvaluelen2;
1136 if (args->rmtblkno) { 1158 if (args->rmtblkno) {
1137 error = xfs_attr_rmtval_remove(args); 1159 error = xfs_attr_rmtval_remove(args);
1138 if (error) 1160 if (error)
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index fe9587fab17a..511c283459b1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1229,6 +1229,7 @@ xfs_attr3_leaf_add_work(
1229 name_rmt->valueblk = 0; 1229 name_rmt->valueblk = 0;
1230 args->rmtblkno = 1; 1230 args->rmtblkno = 1;
1231 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); 1231 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
1232 args->rmtvaluelen = args->valuelen;
1232 } 1233 }
1233 xfs_trans_log_buf(args->trans, bp, 1234 xfs_trans_log_buf(args->trans, bp,
1234 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), 1235 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -2167,11 +2168,11 @@ xfs_attr3_leaf_lookup_int(
2167 if (!xfs_attr_namesp_match(args->flags, entry->flags)) 2168 if (!xfs_attr_namesp_match(args->flags, entry->flags))
2168 continue; 2169 continue;
2169 args->index = probe; 2170 args->index = probe;
2170 args->valuelen = be32_to_cpu(name_rmt->valuelen); 2171 args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
2171 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2172 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2172 args->rmtblkcnt = xfs_attr3_rmt_blocks( 2173 args->rmtblkcnt = xfs_attr3_rmt_blocks(
2173 args->dp->i_mount, 2174 args->dp->i_mount,
2174 args->valuelen); 2175 args->rmtvaluelen);
2175 return XFS_ERROR(EEXIST); 2176 return XFS_ERROR(EEXIST);
2176 } 2177 }
2177 } 2178 }
@@ -2220,19 +2221,19 @@ xfs_attr3_leaf_getvalue(
2220 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); 2221 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2221 ASSERT(name_rmt->namelen == args->namelen); 2222 ASSERT(name_rmt->namelen == args->namelen);
2222 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2223 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2223 valuelen = be32_to_cpu(name_rmt->valuelen); 2224 args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
2224 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2225 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2225 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, 2226 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
2226 valuelen); 2227 args->rmtvaluelen);
2227 if (args->flags & ATTR_KERNOVAL) { 2228 if (args->flags & ATTR_KERNOVAL) {
2228 args->valuelen = valuelen; 2229 args->valuelen = args->rmtvaluelen;
2229 return 0; 2230 return 0;
2230 } 2231 }
2231 if (args->valuelen < valuelen) { 2232 if (args->valuelen < args->rmtvaluelen) {
2232 args->valuelen = valuelen; 2233 args->valuelen = args->rmtvaluelen;
2233 return XFS_ERROR(ERANGE); 2234 return XFS_ERROR(ERANGE);
2234 } 2235 }
2235 args->valuelen = valuelen; 2236 args->valuelen = args->rmtvaluelen;
2236 } 2237 }
2237 return 0; 2238 return 0;
2238} 2239}
@@ -2519,7 +2520,7 @@ xfs_attr3_leaf_clearflag(
2519 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); 2520 ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
2520 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); 2521 name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
2521 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2522 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2522 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2523 name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
2523 xfs_trans_log_buf(args->trans, bp, 2524 xfs_trans_log_buf(args->trans, bp,
2524 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); 2525 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
2525 } 2526 }
@@ -2677,7 +2678,7 @@ xfs_attr3_leaf_flipflags(
2677 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); 2678 ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
2678 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); 2679 name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
2679 name_rmt->valueblk = cpu_to_be32(args->rmtblkno); 2680 name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
2680 name_rmt->valuelen = cpu_to_be32(args->valuelen); 2681 name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
2681 xfs_trans_log_buf(args->trans, bp1, 2682 xfs_trans_log_buf(args->trans, bp1,
2682 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); 2683 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
2683 } 2684 }
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 01db96f60cf0..833fe5d98d80 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -447,6 +447,7 @@ xfs_attr3_leaf_list_int(
447 args.dp = context->dp; 447 args.dp = context->dp;
448 args.whichfork = XFS_ATTR_FORK; 448 args.whichfork = XFS_ATTR_FORK;
449 args.valuelen = valuelen; 449 args.valuelen = valuelen;
450 args.rmtvaluelen = valuelen;
450 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); 451 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
451 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 452 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
452 args.rmtblkcnt = xfs_attr3_rmt_blocks( 453 args.rmtblkcnt = xfs_attr3_rmt_blocks(
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 6e37823e2932..d2e6e948cec7 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -337,7 +337,7 @@ xfs_attr_rmtval_get(
337 struct xfs_buf *bp; 337 struct xfs_buf *bp;
338 xfs_dablk_t lblkno = args->rmtblkno; 338 xfs_dablk_t lblkno = args->rmtblkno;
339 __uint8_t *dst = args->value; 339 __uint8_t *dst = args->value;
340 int valuelen = args->valuelen; 340 int valuelen;
341 int nmap; 341 int nmap;
342 int error; 342 int error;
343 int blkcnt = args->rmtblkcnt; 343 int blkcnt = args->rmtblkcnt;
@@ -347,7 +347,9 @@ xfs_attr_rmtval_get(
347 trace_xfs_attr_rmtval_get(args); 347 trace_xfs_attr_rmtval_get(args);
348 348
349 ASSERT(!(args->flags & ATTR_KERNOVAL)); 349 ASSERT(!(args->flags & ATTR_KERNOVAL));
350 ASSERT(args->rmtvaluelen == args->valuelen);
350 351
352 valuelen = args->rmtvaluelen;
351 while (valuelen > 0) { 353 while (valuelen > 0) {
352 nmap = ATTR_RMTVALUE_MAPSIZE; 354 nmap = ATTR_RMTVALUE_MAPSIZE;
353 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 355 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
@@ -415,7 +417,7 @@ xfs_attr_rmtval_set(
415 * attributes have headers, we can't just do a straight byte to FSB 417 * attributes have headers, we can't just do a straight byte to FSB
416 * conversion and have to take the header space into account. 418 * conversion and have to take the header space into account.
417 */ 419 */
418 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); 420 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
419 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 421 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
420 XFS_ATTR_FORK); 422 XFS_ATTR_FORK);
421 if (error) 423 if (error)
@@ -480,7 +482,7 @@ xfs_attr_rmtval_set(
480 */ 482 */
481 lblkno = args->rmtblkno; 483 lblkno = args->rmtblkno;
482 blkcnt = args->rmtblkcnt; 484 blkcnt = args->rmtblkcnt;
483 valuelen = args->valuelen; 485 valuelen = args->rmtvaluelen;
484 while (valuelen > 0) { 486 while (valuelen > 0) {
485 struct xfs_buf *bp; 487 struct xfs_buf *bp;
486 xfs_daddr_t dblkno; 488 xfs_daddr_t dblkno;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5b6092ef51ef..f0efc7e970ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5413,6 +5413,7 @@ xfs_bmap_shift_extents(
5413 int whichfork = XFS_DATA_FORK; 5413 int whichfork = XFS_DATA_FORK;
5414 int logflags; 5414 int logflags;
5415 xfs_filblks_t blockcount = 0; 5415 xfs_filblks_t blockcount = 0;
5416 int total_extents;
5416 5417
5417 if (unlikely(XFS_TEST_ERROR( 5418 if (unlikely(XFS_TEST_ERROR(
5418 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5419 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5429,7 +5430,6 @@ xfs_bmap_shift_extents(
5429 ASSERT(current_ext != NULL); 5430 ASSERT(current_ext != NULL);
5430 5431
5431 ifp = XFS_IFORK_PTR(ip, whichfork); 5432 ifp = XFS_IFORK_PTR(ip, whichfork);
5432
5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5434 /* Read in all the extents */ 5434 /* Read in all the extents */
5435 error = xfs_iread_extents(tp, ip, whichfork); 5435 error = xfs_iread_extents(tp, ip, whichfork);
@@ -5456,7 +5456,6 @@ xfs_bmap_shift_extents(
5456 5456
5457 /* We are going to change core inode */ 5457 /* We are going to change core inode */
5458 logflags = XFS_ILOG_CORE; 5458 logflags = XFS_ILOG_CORE;
5459
5460 if (ifp->if_flags & XFS_IFBROOT) { 5459 if (ifp->if_flags & XFS_IFBROOT) {
5461 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5460 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5462 cur->bc_private.b.firstblock = *firstblock; 5461 cur->bc_private.b.firstblock = *firstblock;
@@ -5467,8 +5466,14 @@ xfs_bmap_shift_extents(
5467 logflags |= XFS_ILOG_DEXT; 5466 logflags |= XFS_ILOG_DEXT;
5468 } 5467 }
5469 5468
5470 while (nexts++ < num_exts && 5469 /*
5471 *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { 5470 * There may be delalloc extents in the data fork before the range we
5471 * are collapsing out, so we cannot
5472 * use the count of real extents here. Instead we have to calculate it
5473 * from the incore fork.
5474 */
5475 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5476 while (nexts++ < num_exts && *current_ext < total_extents) {
5472 5477
5473 gotp = xfs_iext_get_ext(ifp, *current_ext); 5478 gotp = xfs_iext_get_ext(ifp, *current_ext);
5474 xfs_bmbt_get_all(gotp, &got); 5479 xfs_bmbt_get_all(gotp, &got);
@@ -5556,10 +5561,11 @@ xfs_bmap_shift_extents(
5556 } 5561 }
5557 5562
5558 (*current_ext)++; 5563 (*current_ext)++;
5564 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5559 } 5565 }
5560 5566
5561 /* Check if we are done */ 5567 /* Check if we are done */
5562 if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) 5568 if (*current_ext == total_extents)
5563 *done = 1; 5569 *done = 1;
5564 5570
5565del_cursor: 5571del_cursor:
@@ -5568,6 +5574,5 @@ del_cursor:
5568 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5574 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5569 5575
5570 xfs_trans_log_inode(tp, ip, logflags); 5576 xfs_trans_log_inode(tp, ip, logflags);
5571
5572 return error; 5577 return error;
5573} 5578}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 01f6a646caa1..296160b8e78c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1418,6 +1418,8 @@ xfs_zero_file_space(
1418 xfs_off_t end_boundary; 1418 xfs_off_t end_boundary;
1419 int error; 1419 int error;
1420 1420
1421 trace_xfs_zero_file_space(ip);
1422
1421 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); 1423 granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
1422 1424
1423 /* 1425 /*
@@ -1432,9 +1434,18 @@ xfs_zero_file_space(
1432 ASSERT(end_boundary <= offset + len); 1434 ASSERT(end_boundary <= offset + len);
1433 1435
1434 if (start_boundary < end_boundary - 1) { 1436 if (start_boundary < end_boundary - 1) {
1435 /* punch out the page cache over the conversion range */ 1437 /*
1438 * punch out delayed allocation blocks and the page cache over
1439 * the conversion range
1440 */
1441 xfs_ilock(ip, XFS_ILOCK_EXCL);
1442 error = xfs_bmap_punch_delalloc_range(ip,
1443 XFS_B_TO_FSBT(mp, start_boundary),
1444 XFS_B_TO_FSB(mp, end_boundary - start_boundary));
1445 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1436 truncate_pagecache_range(VFS_I(ip), start_boundary, 1446 truncate_pagecache_range(VFS_I(ip), start_boundary,
1437 end_boundary - 1); 1447 end_boundary - 1);
1448
1438 /* convert the blocks */ 1449 /* convert the blocks */
1439 error = xfs_alloc_file_space(ip, start_boundary, 1450 error = xfs_alloc_file_space(ip, start_boundary,
1440 end_boundary - start_boundary - 1, 1451 end_boundary - start_boundary - 1,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 107f2fdfe41f..cb10a0aaab3a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1372,21 +1372,29 @@ xfs_buf_iorequest(
1372 xfs_buf_wait_unpin(bp); 1372 xfs_buf_wait_unpin(bp);
1373 xfs_buf_hold(bp); 1373 xfs_buf_hold(bp);
1374 1374
1375 /* Set the count to 1 initially, this will stop an I/O 1375 /*
1376 * Set the count to 1 initially, this will stop an I/O
1376 * completion callout which happens before we have started 1377 * completion callout which happens before we have started
1377 * all the I/O from calling xfs_buf_ioend too early. 1378 * all the I/O from calling xfs_buf_ioend too early.
1378 */ 1379 */
1379 atomic_set(&bp->b_io_remaining, 1); 1380 atomic_set(&bp->b_io_remaining, 1);
1380 _xfs_buf_ioapply(bp); 1381 _xfs_buf_ioapply(bp);
1381 _xfs_buf_ioend(bp, 1); 1382 /*
1383 * If _xfs_buf_ioapply failed, we'll get back here with
1384 * only the reference we took above. _xfs_buf_ioend will
1385 * drop it to zero, so we'd better not queue it for later,
1386 * or we'll free it before it's done.
1387 */
1388 _xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
1382 1389
1383 xfs_buf_rele(bp); 1390 xfs_buf_rele(bp);
1384} 1391}
1385 1392
1386/* 1393/*
1387 * Waits for I/O to complete on the buffer supplied. It returns immediately if 1394 * Waits for I/O to complete on the buffer supplied. It returns immediately if
1388 * no I/O is pending or there is already a pending error on the buffer. It 1395 * no I/O is pending or there is already a pending error on the buffer, in which
1389 * returns the I/O error code, if any, or 0 if there was no error. 1396 * case nothing will ever complete. It returns the I/O error code, if any, or
1397 * 0 if there was no error.
1390 */ 1398 */
1391int 1399int
1392xfs_buf_iowait( 1400xfs_buf_iowait(
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6e95ea79f5d7..201c6091d26a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -60,10 +60,12 @@ typedef struct xfs_da_args {
60 int index; /* index of attr of interest in blk */ 60 int index; /* index of attr of interest in blk */
61 xfs_dablk_t rmtblkno; /* remote attr value starting blkno */ 61 xfs_dablk_t rmtblkno; /* remote attr value starting blkno */
62 int rmtblkcnt; /* remote attr value block count */ 62 int rmtblkcnt; /* remote attr value block count */
63 int rmtvaluelen; /* remote attr value length in bytes */
63 xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */ 64 xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */
64 int index2; /* index of 2nd attr in blk */ 65 int index2; /* index of 2nd attr in blk */
65 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ 66 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
66 int rmtblkcnt2; /* remote attr value block count */ 67 int rmtblkcnt2; /* remote attr value block count */
68 int rmtvaluelen2; /* remote attr value length in bytes */
67 int op_flags; /* operation flags */ 69 int op_flags; /* operation flags */
68 enum xfs_dacmp cmpresult; /* name compare result for lookups */ 70 enum xfs_dacmp cmpresult; /* name compare result for lookups */
69} xfs_da_args_t; 71} xfs_da_args_t;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 79e96ce98733..951a2321ee01 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -679,7 +679,7 @@ xfs_file_dio_aio_write(
679 goto out; 679 goto out;
680 680
681 if (mapping->nrpages) { 681 if (mapping->nrpages) {
682 ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 682 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
683 pos, -1); 683 pos, -1);
684 if (ret) 684 if (ret)
685 goto out; 685 goto out;
@@ -841,7 +841,15 @@ xfs_file_fallocate(
841 goto out_unlock; 841 goto out_unlock;
842 } 842 }
843 843
844 ASSERT(offset + len < i_size_read(inode)); 844 /*
845 * There is no need to overlap collapse range with EOF,
846 * in which case it is effectively a truncate operation
847 */
848 if (offset + len >= i_size_read(inode)) {
849 error = -EINVAL;
850 goto out_unlock;
851 }
852
845 new_size = i_size_read(inode) - len; 853 new_size = i_size_read(inode) - len;
846 854
847 error = xfs_collapse_file_space(ip, offset, len); 855 error = xfs_collapse_file_space(ip, offset, len);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5e7a38fa6ee6..768087bedbac 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1334,7 +1334,8 @@ int
1334xfs_create_tmpfile( 1334xfs_create_tmpfile(
1335 struct xfs_inode *dp, 1335 struct xfs_inode *dp,
1336 struct dentry *dentry, 1336 struct dentry *dentry,
1337 umode_t mode) 1337 umode_t mode,
1338 struct xfs_inode **ipp)
1338{ 1339{
1339 struct xfs_mount *mp = dp->i_mount; 1340 struct xfs_mount *mp = dp->i_mount;
1340 struct xfs_inode *ip = NULL; 1341 struct xfs_inode *ip = NULL;
@@ -1402,7 +1403,6 @@ xfs_create_tmpfile(
1402 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1403 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1403 1404
1404 ip->i_d.di_nlink--; 1405 ip->i_d.di_nlink--;
1405 d_tmpfile(dentry, VFS_I(ip));
1406 error = xfs_iunlink(tp, ip); 1406 error = xfs_iunlink(tp, ip);
1407 if (error) 1407 if (error)
1408 goto out_trans_abort; 1408 goto out_trans_abort;
@@ -1415,6 +1415,7 @@ xfs_create_tmpfile(
1415 xfs_qm_dqrele(gdqp); 1415 xfs_qm_dqrele(gdqp);
1416 xfs_qm_dqrele(pdqp); 1416 xfs_qm_dqrele(pdqp);
1417 1417
1418 *ipp = ip;
1418 return 0; 1419 return 0;
1419 1420
1420 out_trans_abort: 1421 out_trans_abort:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 396cc1fafd0d..f2fcde52b66d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -334,7 +334,7 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
334int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 334int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, 336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
337 umode_t mode); 337 umode_t mode, struct xfs_inode **ipp);
338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
339 struct xfs_inode *ip); 339 struct xfs_inode *ip);
340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 89b07e43ca28..301ecbfcc0be 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -124,15 +124,15 @@ xfs_cleanup_inode(
124 xfs_dentry_to_name(&teardown, dentry, 0); 124 xfs_dentry_to_name(&teardown, dentry, 0);
125 125
126 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); 126 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
127 iput(inode);
128} 127}
129 128
130STATIC int 129STATIC int
131xfs_vn_mknod( 130xfs_generic_create(
132 struct inode *dir, 131 struct inode *dir,
133 struct dentry *dentry, 132 struct dentry *dentry,
134 umode_t mode, 133 umode_t mode,
135 dev_t rdev) 134 dev_t rdev,
135 bool tmpfile) /* unnamed file */
136{ 136{
137 struct inode *inode; 137 struct inode *inode;
138 struct xfs_inode *ip = NULL; 138 struct xfs_inode *ip = NULL;
@@ -156,8 +156,12 @@ xfs_vn_mknod(
156 if (error) 156 if (error)
157 return error; 157 return error;
158 158
159 xfs_dentry_to_name(&name, dentry, mode); 159 if (!tmpfile) {
160 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); 160 xfs_dentry_to_name(&name, dentry, mode);
161 error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
162 } else {
163 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
164 }
161 if (unlikely(error)) 165 if (unlikely(error))
162 goto out_free_acl; 166 goto out_free_acl;
163 167
@@ -180,7 +184,11 @@ xfs_vn_mknod(
180 } 184 }
181#endif 185#endif
182 186
183 d_instantiate(dentry, inode); 187 if (tmpfile)
188 d_tmpfile(dentry, inode);
189 else
190 d_instantiate(dentry, inode);
191
184 out_free_acl: 192 out_free_acl:
185 if (default_acl) 193 if (default_acl)
186 posix_acl_release(default_acl); 194 posix_acl_release(default_acl);
@@ -189,11 +197,23 @@ xfs_vn_mknod(
189 return -error; 197 return -error;
190 198
191 out_cleanup_inode: 199 out_cleanup_inode:
192 xfs_cleanup_inode(dir, inode, dentry); 200 if (!tmpfile)
201 xfs_cleanup_inode(dir, inode, dentry);
202 iput(inode);
193 goto out_free_acl; 203 goto out_free_acl;
194} 204}
195 205
196STATIC int 206STATIC int
207xfs_vn_mknod(
208 struct inode *dir,
209 struct dentry *dentry,
210 umode_t mode,
211 dev_t rdev)
212{
213 return xfs_generic_create(dir, dentry, mode, rdev, false);
214}
215
216STATIC int
197xfs_vn_create( 217xfs_vn_create(
198 struct inode *dir, 218 struct inode *dir,
199 struct dentry *dentry, 219 struct dentry *dentry,
@@ -353,6 +373,7 @@ xfs_vn_symlink(
353 373
354 out_cleanup_inode: 374 out_cleanup_inode:
355 xfs_cleanup_inode(dir, inode, dentry); 375 xfs_cleanup_inode(dir, inode, dentry);
376 iput(inode);
356 out: 377 out:
357 return -error; 378 return -error;
358} 379}
@@ -1053,11 +1074,7 @@ xfs_vn_tmpfile(
1053 struct dentry *dentry, 1074 struct dentry *dentry,
1054 umode_t mode) 1075 umode_t mode)
1055{ 1076{
1056 int error; 1077 return xfs_generic_create(dir, dentry, mode, 0, true);
1057
1058 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
1059
1060 return -error;
1061} 1078}
1062 1079
1063static const struct inode_operations xfs_inode_operations = { 1080static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00e399d..a5f8bd9899d3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -616,11 +616,13 @@ xfs_log_mount(
616 int error = 0; 616 int error = 0;
617 int min_logfsbs; 617 int min_logfsbs;
618 618
619 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 619 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
620 xfs_notice(mp, "Mounting Filesystem"); 620 xfs_notice(mp, "Mounting V%d Filesystem",
621 else { 621 XFS_SB_VERSION_NUM(&mp->m_sb));
622 } else {
622 xfs_notice(mp, 623 xfs_notice(mp,
623"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); 624"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
625 XFS_SB_VERSION_NUM(&mp->m_sb));
624 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 626 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
625 } 627 }
626 628
@@ -1181,11 +1183,14 @@ xlog_iodone(xfs_buf_t *bp)
1181 /* log I/O is always issued ASYNC */ 1183 /* log I/O is always issued ASYNC */
1182 ASSERT(XFS_BUF_ISASYNC(bp)); 1184 ASSERT(XFS_BUF_ISASYNC(bp));
1183 xlog_state_done_syncing(iclog, aborted); 1185 xlog_state_done_syncing(iclog, aborted);
1186
1184 /* 1187 /*
1185 * do not reference the buffer (bp) here as we could race 1188 * drop the buffer lock now that we are done. Nothing references
1186 * with it being freed after writing the unmount record to the 1189 * the buffer after this, so an unmount waiting on this lock can now
1187 * log. 1190 * tear it down safely. As such, it is unsafe to reference the buffer
1191 * (bp) after the unlock as we could race with it being freed.
1188 */ 1192 */
1193 xfs_buf_unlock(bp);
1189} 1194}
1190 1195
1191/* 1196/*
@@ -1368,8 +1373,16 @@ xlog_alloc_log(
1368 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); 1373 bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
1369 if (!bp) 1374 if (!bp)
1370 goto out_free_log; 1375 goto out_free_log;
1371 bp->b_iodone = xlog_iodone; 1376
1377 /*
1378 * The iclogbuf buffer locks are held over IO but we are not going to do
1379 * IO yet. Hence unlock the buffer so that the log IO path can grab it
1380 * when appropriately.
1381 */
1372 ASSERT(xfs_buf_islocked(bp)); 1382 ASSERT(xfs_buf_islocked(bp));
1383 xfs_buf_unlock(bp);
1384
1385 bp->b_iodone = xlog_iodone;
1373 log->l_xbuf = bp; 1386 log->l_xbuf = bp;
1374 1387
1375 spin_lock_init(&log->l_icloglock); 1388 spin_lock_init(&log->l_icloglock);
@@ -1398,6 +1411,9 @@ xlog_alloc_log(
1398 if (!bp) 1411 if (!bp)
1399 goto out_free_iclog; 1412 goto out_free_iclog;
1400 1413
1414 ASSERT(xfs_buf_islocked(bp));
1415 xfs_buf_unlock(bp);
1416
1401 bp->b_iodone = xlog_iodone; 1417 bp->b_iodone = xlog_iodone;
1402 iclog->ic_bp = bp; 1418 iclog->ic_bp = bp;
1403 iclog->ic_data = bp->b_addr; 1419 iclog->ic_data = bp->b_addr;
@@ -1422,7 +1438,6 @@ xlog_alloc_log(
1422 iclog->ic_callback_tail = &(iclog->ic_callback); 1438 iclog->ic_callback_tail = &(iclog->ic_callback);
1423 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1439 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1424 1440
1425 ASSERT(xfs_buf_islocked(iclog->ic_bp));
1426 init_waitqueue_head(&iclog->ic_force_wait); 1441 init_waitqueue_head(&iclog->ic_force_wait);
1427 init_waitqueue_head(&iclog->ic_write_wait); 1442 init_waitqueue_head(&iclog->ic_write_wait);
1428 1443
@@ -1631,6 +1646,12 @@ xlog_cksum(
1631 * we transition the iclogs to IOERROR state *after* flushing all existing 1646 * we transition the iclogs to IOERROR state *after* flushing all existing
1632 * iclogs to disk. This is because we don't want anymore new transactions to be 1647 * iclogs to disk. This is because we don't want anymore new transactions to be
1633 * started or completed afterwards. 1648 * started or completed afterwards.
1649 *
1650 * We lock the iclogbufs here so that we can serialise against IO completion
1651 * during unmount. We might be processing a shutdown triggered during unmount,
1652 * and that can occur asynchronously to the unmount thread, and hence we need to
1653 * ensure that completes before tearing down the iclogbufs. Hence we need to
1654 * hold the buffer lock across the log IO to acheive that.
1634 */ 1655 */
1635STATIC int 1656STATIC int
1636xlog_bdstrat( 1657xlog_bdstrat(
@@ -1638,6 +1659,7 @@ xlog_bdstrat(
1638{ 1659{
1639 struct xlog_in_core *iclog = bp->b_fspriv; 1660 struct xlog_in_core *iclog = bp->b_fspriv;
1640 1661
1662 xfs_buf_lock(bp);
1641 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1663 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1642 xfs_buf_ioerror(bp, EIO); 1664 xfs_buf_ioerror(bp, EIO);
1643 xfs_buf_stale(bp); 1665 xfs_buf_stale(bp);
@@ -1645,7 +1667,8 @@ xlog_bdstrat(
1645 /* 1667 /*
1646 * It would seem logical to return EIO here, but we rely on 1668 * It would seem logical to return EIO here, but we rely on
1647 * the log state machine to propagate I/O errors instead of 1669 * the log state machine to propagate I/O errors instead of
1648 * doing it here. 1670 * doing it here. Similarly, IO completion will unlock the
1671 * buffer, so we don't do it here.
1649 */ 1672 */
1650 return 0; 1673 return 0;
1651 } 1674 }
@@ -1847,14 +1870,28 @@ xlog_dealloc_log(
1847 xlog_cil_destroy(log); 1870 xlog_cil_destroy(log);
1848 1871
1849 /* 1872 /*
1850 * always need to ensure that the extra buffer does not point to memory 1873 * Cycle all the iclogbuf locks to make sure all log IO completion
1851 * owned by another log buffer before we free it. 1874 * is done before we tear down these buffers.
1852 */ 1875 */
1876 iclog = log->l_iclog;
1877 for (i = 0; i < log->l_iclog_bufs; i++) {
1878 xfs_buf_lock(iclog->ic_bp);
1879 xfs_buf_unlock(iclog->ic_bp);
1880 iclog = iclog->ic_next;
1881 }
1882
1883 /*
1884 * Always need to ensure that the extra buffer does not point to memory
1885 * owned by another log buffer before we free it. Also, cycle the lock
1886 * first to ensure we've completed IO on it.
1887 */
1888 xfs_buf_lock(log->l_xbuf);
1889 xfs_buf_unlock(log->l_xbuf);
1853 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); 1890 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
1854 xfs_buf_free(log->l_xbuf); 1891 xfs_buf_free(log->l_xbuf);
1855 1892
1856 iclog = log->l_iclog; 1893 iclog = log->l_iclog;
1857 for (i=0; i<log->l_iclog_bufs; i++) { 1894 for (i = 0; i < log->l_iclog_bufs; i++) {
1858 xfs_buf_free(iclog->ic_bp); 1895 xfs_buf_free(iclog->ic_bp);
1859 next_iclog = iclog->ic_next; 1896 next_iclog = iclog->ic_next;
1860 kmem_free(iclog); 1897 kmem_free(iclog);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 993cb19e7d39..944f3d9456a8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -743,8 +743,6 @@ xfs_mountfs(
743 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; 743 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
744 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) 744 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
745 mp->m_inode_cluster_size = new_size; 745 mp->m_inode_cluster_size = new_size;
746 xfs_info(mp, "Using inode cluster size of %d bytes",
747 mp->m_inode_cluster_size);
748 } 746 }
749 747
750 /* 748 /*
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 0c0e41bbe4e3..8baf61afae1d 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -201,10 +201,6 @@ xfs_mount_validate_sb(
201 * write validation, we don't need to check feature masks. 201 * write validation, we don't need to check feature masks.
202 */ 202 */
203 if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { 203 if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
204 xfs_alert(mp,
205"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
206"Use of these features in this kernel is at your own risk!");
207
208 if (xfs_sb_has_compat_feature(sbp, 204 if (xfs_sb_has_compat_feature(sbp,
209 XFS_SB_FEAT_COMPAT_UNKNOWN)) { 205 XFS_SB_FEAT_COMPAT_UNKNOWN)) {
210 xfs_warn(mp, 206 xfs_warn(mp,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a4ae41c179a8..65d8c793a25c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
603DEFINE_INODE_EVENT(xfs_inactive_symlink); 603DEFINE_INODE_EVENT(xfs_inactive_symlink);
604DEFINE_INODE_EVENT(xfs_alloc_file_space); 604DEFINE_INODE_EVENT(xfs_alloc_file_space);
605DEFINE_INODE_EVENT(xfs_free_file_space); 605DEFINE_INODE_EVENT(xfs_free_file_space);
606DEFINE_INODE_EVENT(xfs_zero_file_space);
606DEFINE_INODE_EVENT(xfs_collapse_file_space); 607DEFINE_INODE_EVENT(xfs_collapse_file_space);
607DEFINE_INODE_EVENT(xfs_readdir); 608DEFINE_INODE_EVENT(xfs_readdir);
608#ifdef CONFIG_XFS_POSIX_ACL 609#ifdef CONFIG_XFS_POSIX_ACL