71 files changed, 1003 insertions, 814 deletions
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 6d589f28bf9b..895ac7dc9dbf 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -340,8 +340,6 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
                                &blocksize,&sbi->s_prefix,
                                sbi->s_volume, &mount_flags)) {
                printk(KERN_ERR "AFFS: Error parsing options\n");
-                kfree(sbi->s_prefix);
-                kfree(sbi);
                return -EINVAL;
        }
        /* N.B. after this point s_prefix must be released */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 1c8c6cc6de30..4b0eff6da674 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -130,6 +130,15 @@ static void afs_cm_destructor(struct afs_call *call)
 {
        _enter("");
+        /* Break the callbacks here so that we do it after the final ACK is
+         * received.  The step number here must match the final number in
+         * afs_deliver_cb_callback().
+         */
+        if (call->unmarshall == 6) {
+                ASSERT(call->server && call->count && call->request);
+                afs_break_callbacks(call->server, call->count, call->request);
+        }
        afs_put_server(call->server);
        call->server = NULL;
        kfree(call->buffer);
@@ -272,6 +281,16 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
                _debug("trailer");
                if (skb->len != 0)
                        return -EBADMSG;
+                /* Record that the message was unmarshalled successfully so
+                 * that the call destructor can know do the callback breaking
+                 * work, even if the final ACK isn't received.
+                 *
+                 * If the step number changes, then afs_cm_destructor() must be
+                 * updated also.
+                 */
+                call->unmarshall++;
+        case 6:
                break;
        }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index be75b500005d..590b55f46d61 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -75,7 +75,7 @@ struct afs_call {
        const struct afs_call_type *type;       /* type of call */
        const struct afs_wait_mode *wait_mode;  /* completion wait mode */
        wait_queue_head_t       waitq;          /* processes awaiting completion */
-        work_func_t             async_workfn;
+        void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
        struct work_struct      async_work;     /* asynchronous work processor */
        struct work_struct      work;           /* actual work processor */
        struct sk_buff_head     rx_queue;       /* received packets */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ef943df73b8c..03a3beb17004 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -25,7 +25,7 @@ static void afs_wake_up_call_waiter(struct afs_call *);
 static int afs_wait_for_call_to_complete(struct afs_call *);
 static void afs_wake_up_async_call(struct afs_call *);
 static int afs_dont_wait_for_call_to_complete(struct afs_call *);
-static void afs_process_async_call(struct work_struct *);
+static void afs_process_async_call(struct afs_call *);
 static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
 static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
@@ -58,6 +58,13 @@ static void afs_collect_incoming_call(struct work_struct *);
 static struct sk_buff_head afs_incoming_calls;
 static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
+static void afs_async_workfn(struct work_struct *work)
+{
+        struct afs_call *call = container_of(work, struct afs_call, async_work);
+        call->async_workfn(call);
+}
 /*
 * open an RxRPC socket and bind it to be a server for callback notifications
 * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
@@ -184,6 +191,28 @@ static void afs_free_call(struct afs_call *call)
 }
 /*
+ * End a call but do not free it
+ */
+static void afs_end_call_nofree(struct afs_call *call)
+{
+        if (call->rxcall) {
+                rxrpc_kernel_end_call(call->rxcall);
+                call->rxcall = NULL;
+        }
+        if (call->type->destructor)
+                call->type->destructor(call);
+}
+/*
+ * End a call and free it
+ */
+static void afs_end_call(struct afs_call *call)
+{
+        afs_end_call_nofree(call);
+        afs_free_call(call);
+}
+/*
 * allocate a call with flat request and reply buffers
 */
 struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
@@ -326,7 +355,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
               atomic_read(&afs_outstanding_calls));
        call->wait_mode = wait_mode;
-        INIT_WORK(&call->async_work, afs_process_async_call);
+        call->async_workfn = afs_process_async_call;
+        INIT_WORK(&call->async_work, afs_async_workfn);
        memset(&srx, 0, sizeof(srx));
        srx.srx_family = AF_RXRPC;
@@ -383,11 +413,8 @@ error_do_abort:
        rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
        while ((skb = skb_dequeue(&call->rx_queue)))
                afs_free_skb(skb);
-        rxrpc_kernel_end_call(rxcall);
-        call->rxcall = NULL;
 error_kill_call:
-        call->type->destructor(call);
+        afs_end_call(call);
-        afs_free_call(call);
        _leave(" = %d", ret);
        return ret;
 }
@@ -509,12 +536,8 @@ static void afs_deliver_to_call(struct afs_call *call)
        if (call->state >= AFS_CALL_COMPLETE) {
                while ((skb = skb_dequeue(&call->rx_queue)))
                        afs_free_skb(skb);
-                if (call->incoming) {
+                if (call->incoming)
-                        rxrpc_kernel_end_call(call->rxcall);
+                        afs_end_call(call);
-                        call->rxcall = NULL;
-                        call->type->destructor(call);
-                        afs_free_call(call);
-                }
        }
        _leave("");
@@ -564,10 +587,7 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
        }
        _debug("call complete");
-        rxrpc_kernel_end_call(call->rxcall);
+        afs_end_call(call);
-        call->rxcall = NULL;
-        call->type->destructor(call);
-        afs_free_call(call);
        _leave(" = %d", ret);
        return ret;
 }
@@ -603,11 +623,8 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
 /*
 * delete an asynchronous call
 */
-static void afs_delete_async_call(struct work_struct *work)
+static void afs_delete_async_call(struct afs_call *call)
 {
-        struct afs_call *call =
-                container_of(work, struct afs_call, async_work);
        _enter("");
        afs_free_call(call);
@@ -620,11 +637,8 @@ static void afs_delete_async_call(struct work_struct *work)
 * - on a multiple-thread workqueue this work item may try to run on several
 *   CPUs at the same time
 */
-static void afs_process_async_call(struct work_struct *work)
+static void afs_process_async_call(struct afs_call *call)
 {
-        struct afs_call *call =
-                container_of(work, struct afs_call, async_work);
        _enter("");
        if (!skb_queue_empty(&call->rx_queue))
@@ -637,10 +651,7 @@ static void afs_process_async_call(struct work_struct *work)
                call->reply = NULL;
                /* kill the call */
-                rxrpc_kernel_end_call(call->rxcall);
+                afs_end_call_nofree(call);
-                call->rxcall = NULL;
-                if (call->type->destructor)
-                        call->type->destructor(call);
                /* we can't just delete the call because the work item may be
                 * queued */
@@ -663,13 +674,6 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
        call->reply_size += len;
 }
-static void afs_async_workfn(struct work_struct *work)
-{
-        struct afs_call *call = container_of(work, struct afs_call, async_work);
-        call->async_workfn(work);
-}
 /*
 * accept the backlog of incoming calls
 */
@@ -790,10 +794,7 @@ void afs_send_empty_reply(struct afs_call *call)
                _debug("oom");
                rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
        default:
-                rxrpc_kernel_end_call(call->rxcall);
+                afs_end_call(call);
-                call->rxcall = NULL;
-                call->type->destructor(call);
-                afs_free_call(call);
                _leave(" [error]");
                return;
        }
@@ -823,17 +824,16 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
        call->state = AFS_CALL_AWAIT_ACK;
        n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
        if (n >= 0) {
+                /* Success */
                _leave(" [replied]");
                return;
        }
        if (n == -ENOMEM) {
                _debug("oom");
                rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
        }
-        rxrpc_kernel_end_call(call->rxcall);
+        afs_end_call(call);
-        call->rxcall = NULL;
-        call->type->destructor(call);
-        afs_free_call(call);
        _leave(" [error]");
 }
diff --git a/fs/aio.c b/fs/aio.c
index 12a3de0ee6da..a0ed6c7d2cd2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -112,6 +112,11 @@ struct kioctx {
        struct work_struct      free_work;
+        /*
+         * signals when all in-flight requests are done
+         */
+        struct completion *requests_done;
        struct {
                /*
                 * This counts the number of available slots in the ringbuffer,
@@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 {
        struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+        /* At this point we know that there are no any in-flight requests */
+        if (ctx->requests_done)
+                complete(ctx->requests_done);
        INIT_WORK(&ctx->free_work, free_ioctx);
        schedule_work(&ctx->free_work);
 }
@@ -718,7 +727,8 @@ err:
 *      when the processes owning a context have all exited to encourage
 *      the rapid destruction of the kioctx.
 */
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
+static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+                struct completion *requests_done)
 {
        if (!atomic_xchg(&ctx->dead, 1)) {
                struct kioctx_table *table;
@@ -747,7 +757,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
                if (ctx->mmap_size)
                        vm_munmap(ctx->mmap_base, ctx->mmap_size);
+                ctx->requests_done = requests_done;
                percpu_ref_kill(&ctx->users);
+        } else {
+                if (requests_done)
+                        complete(requests_done);
        }
 }
@@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm)
                 */
                ctx->mmap_size = 0;
-                kill_ioctx(mm, ctx);
+                kill_ioctx(mm, ctx, NULL);
        }
 }
@@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
        if (!IS_ERR(ioctx)) {
                ret = put_user(ioctx->user_id, ctxp);
                if (ret)
-                        kill_ioctx(current->mm, ioctx);
+                        kill_ioctx(current->mm, ioctx, NULL);
                percpu_ref_put(&ioctx->users);
        }
@@ -1203,8 +1217,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 {
        struct kioctx *ioctx = lookup_ioctx(ctx);
        if (likely(NULL != ioctx)) {
-                kill_ioctx(current->mm, ioctx);
+                struct completion requests_done =
+                        COMPLETION_INITIALIZER_ONSTACK(requests_done);
+                /* Pass requests_done to kill_ioctx() where it can be set
+                 * in a thread-safe way. If we try to set it here then we have
+                 * a race condition if two io_destroy() called simultaneously.
+                 */
+                kill_ioctx(current->mm, ioctx, &requests_done);
                percpu_ref_put(&ioctx->users);
+                /* Wait until all IO for the context are done. Otherwise kernel
+                 * keep using user-space buffers even if user thinks the context
+                 * is destroyed.
+                 */
+                wait_for_completion(&requests_done);
                return 0;
        }
        pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1299,10 +1327,8 @@ rw_common:
                                                &iovec, compat)
                        : aio_setup_single_vector(req, rw, buf, &nr_segs,
                                                  iovec);
-                if (ret)
+                if (!ret)
-                        return ret;
+                        ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
-                ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
                if (ret < 0) {
                        if (iovec != &inline_vec)
                                kfree(iovec);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2caf36ac3e93..cc87c1abac97 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
                spin_lock(&active->d_lock);
                /* Already gone? */
-                if (!d_count(active))
+                if ((int) d_count(active) <= 0)
                        goto next;
                qstr = &active->d_name;
@@ -230,7 +230,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
                spin_lock(&expiring->d_lock);
-                /* Bad luck, we've already been dentry_iput */
+                /* We've already been dentry_iput or unlinked */
                if (!expiring->d_inode)
                        goto next;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c48df572bd6..ba6b88528dc7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args {
 #define btrfs_raw_test_opt(o, opt)      ((o) & BTRFS_MOUNT_##opt)
 #define btrfs_test_opt(root, opt)       ((root)->fs_info->mount_opt & \
                                         BTRFS_MOUNT_##opt)
+#define btrfs_set_and_info(root, opt, fmt, args...)                     \
+{                                                                       \
+        if (!btrfs_test_opt(root, opt))                                 \
+                btrfs_info(root->fs_info, fmt, ##args);                 \
+        btrfs_set_opt(root->fs_info->mount_opt, opt);                   \
+}
+#define btrfs_clear_and_info(root, opt, fmt, args...)                   \
+{                                                                       \
+        if (btrfs_test_opt(root, opt))                                  \
+                btrfs_info(root->fs_info, fmt, ##args);                 \
+        btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
+}
 /*
 * Inode flags
 */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 029d46c2e170..983314932af3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2861,7 +2861,7 @@ retry_root_backup:
                        printk(KERN_ERR "BTRFS: failed to read log tree\n");
                        free_extent_buffer(log_tree_root->node);
                        kfree(log_tree_root);
-                        goto fail_trans_kthread;
+                        goto fail_qgroup;
                }
                /* returns with log_tree_root freed on success */
                ret = btrfs_recover_log_trees(log_tree_root);
@@ -2870,24 +2870,24 @@ retry_root_backup:
                                    "Failed to recover log tree");
                        free_extent_buffer(log_tree_root->node);
                        kfree(log_tree_root);
-                        goto fail_trans_kthread;
+                        goto fail_qgroup;
                }
                if (sb->s_flags & MS_RDONLY) {
                        ret = btrfs_commit_super(tree_root);
                        if (ret)
-                                goto fail_trans_kthread;
+                                goto fail_qgroup;
                }
        }
        ret = btrfs_find_orphan_roots(tree_root);
        if (ret)
-                goto fail_trans_kthread;
+                goto fail_qgroup;
        if (!(sb->s_flags & MS_RDONLY)) {
                ret = btrfs_cleanup_fs_roots(fs_info);
                if (ret)
-                        goto fail_trans_kthread;
+                        goto fail_qgroup;
                ret = btrfs_recover_relocation(tree_root);
                if (ret < 0) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1306487c82cf..5590af92094b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1542,6 +1542,7 @@ again:
                                ret = 0;
                }
                if (ret) {
+                        key.objectid = bytenr;
                        key.type = BTRFS_EXTENT_ITEM_KEY;
                        key.offset = num_bytes;
                        btrfs_release_path(path);
@@ -3542,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
        return extended_to_chunk(flags | tmp);
 }
-static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
 {
        unsigned seq;
+        u64 flags;
        do {
+                flags = orig_flags;
                seq = read_seqbegin(&root->fs_info->profiles_lock);
                if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -5719,6 +5722,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        if (ret > 0 && skinny_metadata) {
                                skinny_metadata = false;
+                                key.objectid = bytenr;
                                key.type = BTRFS_EXTENT_ITEM_KEY;
                                key.offset = num_bytes;
                                btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eb742c07e7a4..ae6af072b635 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -800,7 +800,7 @@ next_slot:
                if (start > key.offset && end < extent_end) {
                        BUG_ON(del_nr > 0);
                        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                                ret = -EINVAL;
+                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -846,7 +846,7 @@ next_slot:
                 */
                if (start <= key.offset && end < extent_end) {
                        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                                ret = -EINVAL;
+                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -872,7 +872,7 @@ next_slot:
                if (start > key.offset && end >= extent_end) {
                        BUG_ON(del_nr > 0);
                        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                                ret = -EINVAL;
+                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -1777,7 +1777,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        start_pos = round_down(pos, root->sectorsize);
        if (start_pos > i_size_read(inode)) {
                /* Expand hole size to cover write data, preventing empty gap */
-                end_pos = round_up(pos + iov->iov_len, root->sectorsize);
+                end_pos = round_up(pos + count, root->sectorsize);
                err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
                        mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc8ca193d830..86935f5ae291 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root)
        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
                          root->root_key.objectid);
-        BUG_ON(IS_ERR(tsk)); /* -ENOMEM */
+        if (IS_ERR(tsk)) {
+                btrfs_warn(root->fs_info, "failed to start inode caching task");
+                btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
+                                "disabling inode map caching");
+        }
 }
 int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -205,24 +209,14 @@ again:
 void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 {
-        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
                return;
 again:
        if (root->cached == BTRFS_CACHE_FINISHED) {
-                __btrfs_add_free_space(ctl, objectid, 1);
+                __btrfs_add_free_space(pinned, objectid, 1);
        } else {
-                /*
-                 * If we are in the process of caching free ino chunks,
-                 * to avoid adding the same inode number to the free_ino
-                 * tree twice due to cross transaction, we'll leave it
-                 * in the pinned tree until a transaction is committed
-                 * or the caching work is done.
-                 */
                down_write(&root->fs_info->commit_root_sem);
                spin_lock(&root->cache_lock);
                if (root->cached == BTRFS_CACHE_FINISHED) {
@@ -234,11 +228,7 @@ again:
                start_caching(root);
-                if (objectid <= root->cache_progress ||
+                __btrfs_add_free_space(pinned, objectid, 1);
-                    objectid >= root->highest_objectid)
-                        __btrfs_add_free_space(ctl, objectid, 1);
-                else
-                        __btrfs_add_free_space(pinned, objectid, 1);
                up_write(&root->fs_info->commit_root_sem);
        }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e79ff6b90cb7..2f6d7b13b5bd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3066,7 +3066,7 @@ process_slot:
                                                         new_key.offset + datal,
                                                         1);
                                if (ret) {
-                                        if (ret != -EINVAL)
+                                        if (ret != -EOPNOTSUPP)
                                                btrfs_abort_transaction(trans,
                                                                root, ret);
                                        btrfs_end_transaction(trans, root);
@@ -3120,6 +3120,8 @@ process_slot:
                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
                                u64 skip = 0;
                                u64 trim = 0;
+                                u64 aligned_end = 0;
                                if (off > key.offset) {
                                        skip = off - key.offset;
                                        new_key.offset += skip;
@@ -3136,12 +3138,14 @@ process_slot:
                                size -= skip + trim;
                                datal -= skip + trim;
+                                aligned_end = ALIGN(new_key.offset + datal,
+                                                    root->sectorsize);
                                ret = btrfs_drop_extents(trans, root, inode,
                                                         new_key.offset,
-                                                         new_key.offset + datal,
+                                                         aligned_end,
                                                         1);
                                if (ret) {
-                                        if (ret != -EINVAL)
+                                        if (ret != -EOPNOTSUPP)
                                                btrfs_abort_transaction(trans,
                                                        root, ret);
                                        btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1ac3ca98c429..fd38b5053479 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
        if (p->buf_len >= len)
                return 0;
+        if (len > PATH_MAX) {
+                WARN_ON(1);
+                return -ENOMEM;
+        }
        path_len = p->end - p->start;
        old_buf_len = p->buf_len;
@@ -1663,7 +1668,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
                goto out;
        }
-        if (key.type == BTRFS_INODE_REF_KEY) {
+        if (found_key.type == BTRFS_INODE_REF_KEY) {
                struct btrfs_inode_ref *iref;
                iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
                                      struct btrfs_inode_ref);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 5011aadacab8..9601d25a4607 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -385,20 +385,6 @@ static match_table_t tokens = {
        {Opt_err, NULL},
 };
-#define btrfs_set_and_info(root, opt, fmt, args...)                     \
-{                                                                       \
-        if (!btrfs_test_opt(root, opt))                                 \
-                btrfs_info(root->fs_info, fmt, ##args);                 \
-        btrfs_set_opt(root->fs_info->mount_opt, opt);                   \
-}
-#define btrfs_clear_and_info(root, opt, fmt, args...)                   \
-{                                                                       \
-        if (btrfs_test_opt(root, opt))                                  \
-                btrfs_info(root->fs_info, fmt, ##args);                 \
-        btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
-}
 /*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
@@ -1186,7 +1172,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
                return ERR_PTR(-ENOMEM);
        mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
                             newargs);
-        kfree(newargs);
        if (PTR_RET(mnt) == -EBUSY) {
                if (flags & MS_RDONLY) {
@@ -1196,17 +1181,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
                        int r;
                        mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
                                             newargs);
-                        if (IS_ERR(mnt))
+                        if (IS_ERR(mnt)) {
+                                kfree(newargs);
                                return ERR_CAST(mnt);
+                        }
                        r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
                        if (r < 0) {
                                /* FIXME: release vfsmount mnt ??*/
+                                kfree(newargs);
                                return ERR_PTR(r);
                        }
                }
        }
+        kfree(newargs);
        if (IS_ERR(mnt))
                return ERR_CAST(mnt);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2e5e648eb5c3..c561b628ebce 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
                        rel->seq = cpu_to_le32(cap->seq);
                        rel->issue_seq = cpu_to_le32(cap->issue_seq),
                        rel->mseq = cpu_to_le32(cap->mseq);
-                        rel->caps = cpu_to_le32(cap->issued);
+                        rel->caps = cpu_to_le32(cap->implemented);
                        rel->wanted = cpu_to_le32(cap->mds_wanted);
                        rel->dname_len = 0;
                        rel->dname_seq = 0;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 766410a12c2c..c29d6ae68874 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
        /* start at beginning? */
        if (ctx->pos == 2 || last == NULL ||
-            ctx->pos < ceph_dentry(last)->offset) {
+            fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
                if (list_empty(&parent->d_subdirs))
                        goto out_unlock;
                p = parent->d_subdirs.prev;
@@ -182,9 +182,16 @@ more:
        spin_unlock(&dentry->d_lock);
        spin_unlock(&parent->d_lock);
+        /* make sure a dentry wasn't dropped while we didn't have parent lock */
+        if (!ceph_dir_is_complete(dir)) {
+                dout(" lost dir complete on %p; falling back to mds\n", dir);
+                dput(dentry);
+                err = -EAGAIN;
+                goto out;
+        }
        dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
             dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
-        ctx->pos = di->offset;
        if (!dir_emit(ctx, dentry->d_name.name,
                      dentry->d_name.len,
                      ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -198,19 +205,12 @@ more:
                return 0;
        }
+        ctx->pos = di->offset + 1;
        if (last)
                dput(last);
        last = dentry;
-        ctx->pos++;
-        /* make sure a dentry wasn't dropped while we didn't have parent lock */
-        if (!ceph_dir_is_complete(dir)) {
-                dout(" lost dir complete on %p; falling back to mds\n", dir);
-                err = -EAGAIN;
-                goto out;
-        }
        spin_lock(&parent->d_lock);
        p = p->prev;    /* advance to next dentry */
        goto more;
@@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
                err = __dcache_readdir(file, ctx, shared_gen);
                if (err != -EAGAIN)
                        return err;
+                frag = fpos_frag(ctx->pos);
+                off = fpos_off(ctx->pos);
        } else {
                spin_unlock(&ci->i_ceph_lock);
        }
@@ -446,7 +448,6 @@ more:
        if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
                dout(" marking %p complete\n", inode);
                __ceph_dir_set_complete(ci, fi->dir_release_count);
-                ci->i_max_offset = ctx->pos;
        }
        spin_unlock(&ci->i_ceph_lock);
@@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * to do it here.
                 */
-                /* d_move screws up d_subdirs order */
-                ceph_dir_clear_complete(new_dir);
                d_move(old_dentry, new_dentry);
                /* ensure target dentry is invalidated, despite
                   rehashing bug in vfs_rename_dir */
                ceph_invalidate_dentry_lease(new_dentry);
+                /* d_move screws up sibling dentries' offsets */
+                ceph_dir_clear_complete(old_dir);
+                ceph_dir_clear_complete(new_dir);
        }
        ceph_mdsc_put_request(req);
        return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 39da1c2efa50..88a6df4cbe6d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
-        if (IS_SWAPFILE(inode))
-                return -ETXTBSY;
        mutex_lock(&inode->i_mutex);
        if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0b0728e5be2d..233c6f96910a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
            !__ceph_dir_is_complete(ci)) {
                dout(" marking %p complete (empty)\n", inode);
                __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
-                ci->i_max_offset = 2;
        }
 no_change:
        /* only update max_size on auth cap */
@@ -890,41 +889,6 @@ out_unlock:
 }
 /*
- * Set dentry's directory position based on the current dir's max, and
- * order it in d_subdirs, so that dcache_readdir behaves.
- *
- * Always called under directory's i_mutex.
- */
-static void ceph_set_dentry_offset(struct dentry *dn)
-{
-        struct dentry *dir = dn->d_parent;
-        struct inode *inode = dir->d_inode;
-        struct ceph_inode_info *ci;
-        struct ceph_dentry_info *di;
-        BUG_ON(!inode);
-        ci = ceph_inode(inode);
-        di = ceph_dentry(dn);
-        spin_lock(&ci->i_ceph_lock);
-        if (!__ceph_dir_is_complete(ci)) {
-                spin_unlock(&ci->i_ceph_lock);
-                return;
-        }
-        di->offset = ceph_inode(inode)->i_max_offset++;
-        spin_unlock(&ci->i_ceph_lock);
-        spin_lock(&dir->d_lock);
-        spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-        list_move(&dn->d_u.d_child, &dir->d_subdirs);
-        dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
-             dn->d_u.d_child.prev, dn->d_u.d_child.next);
-        spin_unlock(&dn->d_lock);
-        spin_unlock(&dir->d_lock);
-}
-/*
 * splice a dentry to an inode.
 * caller must hold directory i_mutex for this to be safe.
 *
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 * the caller) if we fail.
 */
 static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
-                                    bool *prehash, bool set_offset)
+                                    bool *prehash)
 {
        struct dentry *realdn;
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
        }
        if ((!prehash || *prehash) && d_unhashed(dn))
                d_rehash(dn);
-        if (set_offset)
-                ceph_set_dentry_offset(dn);
 out:
        return dn;
 }
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 {
        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
        struct inode *in = NULL;
-        struct ceph_mds_reply_inode *ininfo;
        struct ceph_vino vino;
        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
        int err = 0;
@@ -1161,6 +1122,9 @@ retry_lookup:
                /* rename? */
                if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
+                        struct inode *olddir = req->r_old_dentry_dir;
+                        BUG_ON(!olddir);
                        dout(" src %p '%.*s' dst %p '%.*s'\n",
                             req->r_old_dentry,
                             req->r_old_dentry->d_name.len,
@@ -1180,13 +1144,10 @@ retry_lookup:
                           rehashing bug in vfs_rename_dir */
                        ceph_invalidate_dentry_lease(dn);
-                        /*
+                        /* d_move screws up sibling dentries' offsets */
-                         * d_move() puts the renamed dentry at the end of
+                        ceph_dir_clear_complete(dir);
-                         * d_subdirs.  We need to assign it an appropriate
+                        ceph_dir_clear_complete(olddir);
-                         * directory offset so we can behave when dir is
-                         * complete.
-                         */
-                        ceph_set_dentry_offset(req->r_old_dentry);
                        dout("dn %p gets new offset %lld\n", req->r_old_dentry,
                             ceph_dentry(req->r_old_dentry)->offset);
@@ -1213,8 +1174,9 @@ retry_lookup:
                /* attach proper inode */
                if (!dn->d_inode) {
+                        ceph_dir_clear_complete(dir);
                        ihold(in);
-                        dn = splice_dentry(dn, in, &have_lease, true);
+                        dn = splice_dentry(dn, in, &have_lease);
                        if (IS_ERR(dn)) {
                                err = PTR_ERR(dn);
                                goto done;
@@ -1235,17 +1197,16 @@ retry_lookup:
                   (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
                    req->r_op == CEPH_MDS_OP_MKSNAP)) {
                struct dentry *dn = req->r_dentry;
+                struct inode *dir = req->r_locked_dir;
                /* fill out a snapdir LOOKUPSNAP dentry */
                BUG_ON(!dn);
-                BUG_ON(!req->r_locked_dir);
+                BUG_ON(!dir);
-                BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR);
+                BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
-                ininfo = rinfo->targeti.in;
-                vino.ino = le64_to_cpu(ininfo->ino);
-                vino.snap = le64_to_cpu(ininfo->snapid);
                dout(" linking snapped dir %p to dn %p\n", in, dn);
+                ceph_dir_clear_complete(dir);
                ihold(in);
-                dn = splice_dentry(dn, in, NULL, true);
+                dn = splice_dentry(dn, in, NULL);
                if (IS_ERR(dn)) {
                        err = PTR_ERR(dn);
                        goto done;
@@ -1407,7 +1368,7 @@ retry_lookup:
                }
                if (!dn->d_inode) {
-                        dn = splice_dentry(dn, in, NULL, false);
+                        dn = splice_dentry(dn, in, NULL);
                        if (IS_ERR(dn)) {
                                err = PTR_ERR(dn);
                                dn = NULL;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index fdf941b44ff1..a822a6e58290 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
                return PTR_ERR(req);
        req->r_inode = inode;
        ihold(inode);
+        req->r_num_caps = 1;
        req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
        req->r_args.setlayout.layout.fl_stripe_unit =
@@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
                return PTR_ERR(req);
        req->r_inode = inode;
        ihold(inode);
+        req->r_num_caps = 1;
        req->r_args.setlayout.layout.fl_stripe_unit =
                        cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index d94ba0df9f4d..191398852a2e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
                return PTR_ERR(req);
        req->r_inode = inode;
        ihold(inode);
+        req->r_num_caps = 1;
        /* mds requires start and length rather than start and end */
        if (LLONG_MAX == fl->fl_end)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7866cd05a6bb..ead05cc1f447 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -266,7 +266,6 @@ struct ceph_inode_info {
        struct timespec i_rctime;
        u64 i_rbytes, i_rfiles, i_rsubdirs;
        u64 i_files, i_subdirs;
-        u64 i_max_offset;  /* largest readdir offset, set with complete dir */
        struct rb_root i_fragtree;
        struct mutex i_fragtree_mutex;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index aadc2b68678b..a22d667f1069 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1737,6 +1737,9 @@ cifs_inode_needs_reval(struct inode *inode)
        if (cifs_i->time == 0)
                return true;
+        if (!cifs_sb->actimeo)
+                return true;
        if (!time_in_range(jiffies, cifs_i->time,
                                cifs_i->time + cifs_sb->actimeo))
                return true;
diff --git a/fs/compat.c b/fs/compat.c
index ca926ad0430c..66d3d3c6b4b2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
        case F_GETLK64:
        case F_SETLK64:
        case F_SETLKW64:
-        case F_GETLKP:
+        case F_OFD_GETLK:
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                ret = get_compat_flock64(&f, compat_ptr(arg));
                if (ret != 0)
                        break;
@@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
                conv_cmd = convert_fcntl_cmd(cmd);
                ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
                set_fs(old_fs);
-                if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) {
+                if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
                        /* need to return lock information - see above for commentary */
                        if (f.l_start > COMPAT_LOFF_T_MAX)
                                ret = -EOVERFLOW;
@@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
        case F_GETLK64:
        case F_SETLK64:
        case F_SETLKW64:
-        case F_GETLKP:
+        case F_OFD_GETLK:
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                return -EINVAL;
        }
        return compat_sys_fcntl64(fd, cmd, arg);
diff --git a/fs/dcache.c b/fs/dcache.c
index 40707d88a945..be2bea834bf4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -246,16 +246,8 @@ static void __d_free(struct rcu_head *head)
        kmem_cache_free(dentry_cache, dentry); 
 }
-/*
+static void dentry_free(struct dentry *dentry)
- * no locks, please.
- */
-static void d_free(struct dentry *dentry)
 {
-        BUG_ON((int)dentry->d_lockref.count > 0);
-        this_cpu_dec(nr_dentry);
-        if (dentry->d_op && dentry->d_op->d_release)
-                dentry->d_op->d_release(dentry);
        /* if dentry was never visible to RCU, immediate free is OK */
        if (!(dentry->d_flags & DCACHE_RCUACCESS))
                __d_free(&dentry->d_u.d_rcu);
@@ -403,56 +395,6 @@ static void dentry_lru_add(struct dentry *dentry)
                d_lru_add(dentry);
 }
-/*
- * Remove a dentry with references from the LRU.
- *
- * If we are on the shrink list, then we can get to try_prune_one_dentry() and
- * lose our last reference through the parent walk. In this case, we need to
- * remove ourselves from the shrink list, not the LRU.
- */
-static void dentry_lru_del(struct dentry *dentry)
-{
-        if (dentry->d_flags & DCACHE_LRU_LIST) {
-                if (dentry->d_flags & DCACHE_SHRINK_LIST)
-                        return d_shrink_del(dentry);
-                d_lru_del(dentry);
-        }
-}
-/**
- * d_kill - kill dentry and return parent
- * @dentry: dentry to kill
- * @parent: parent dentry
- *
- * The dentry must already be unhashed and removed from the LRU.
- *
- * If this is the root of the dentry tree, return NULL.
- *
- * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
- * d_kill.
- */
-static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
-        __releases(dentry->d_lock)
-        __releases(parent->d_lock)
-        __releases(dentry->d_inode->i_lock)
-{
-        list_del(&dentry->d_u.d_child);
-        /*
-         * Inform d_walk() that we are no longer attached to the
-         * dentry tree
-         */
-        dentry->d_flags |= DCACHE_DENTRY_KILLED;
-        if (parent)
-                spin_unlock(&parent->d_lock);
-        dentry_iput(dentry);
-        /*
-         * dentry_iput drops the locks, at which point nobody (except
-         * transient RCU lookups) can reach this dentry.
-         */
-        d_free(dentry);
-        return parent;
-}
 /**
 * d_drop - drop a dentry
 * @dentry: dentry to drop
@@ -499,37 +441,12 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
-/*
+static void __dentry_kill(struct dentry *dentry)
- * Finish off a dentry we've decided to kill.
- * dentry->d_lock must be held, returns with it unlocked.
- * If ref is non-zero, then decrement the refcount too.
- * Returns dentry requiring refcount drop, or NULL if we're done.
- */
-static struct dentry *
-dentry_kill(struct dentry *dentry, int unlock_on_failure)
-        __releases(dentry->d_lock)
 {
-        struct inode *inode;
+        struct dentry *parent = NULL;
-        struct dentry *parent;
+        bool can_free = true;
+        if (!IS_ROOT(dentry))
-        inode = dentry->d_inode;
-        if (inode && !spin_trylock(&inode->i_lock)) {
-relock:
-                if (unlock_on_failure) {
-                        spin_unlock(&dentry->d_lock);
-                        cpu_relax();
-                }
-                return dentry; /* try again with same dentry */
-        }
-        if (IS_ROOT(dentry))
-                parent = NULL;
-        else
                parent = dentry->d_parent;
-        if (parent && !spin_trylock(&parent->d_lock)) {
-                if (inode)
-                        spin_unlock(&inode->i_lock);
-                goto relock;
-        }
        /*
         * The dentry is now unrecoverably dead to the world.
@@ -543,10 +460,103 @@ relock:
        if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
                dentry->d_op->d_prune(dentry);
-        dentry_lru_del(dentry);
+        if (dentry->d_flags & DCACHE_LRU_LIST) {
+                if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
+                        d_lru_del(dentry);
+        }
        /* if it was on the hash then remove it */
        __d_drop(dentry);
-        return d_kill(dentry, parent);
+        list_del(&dentry->d_u.d_child);
+        /*
+         * Inform d_walk() that we are no longer attached to the
+         * dentry tree
+         */
+        dentry->d_flags |= DCACHE_DENTRY_KILLED;
+        if (parent)
+                spin_unlock(&parent->d_lock);
+        dentry_iput(dentry);
+        /*
+         * dentry_iput drops the locks, at which point nobody (except
+         * transient RCU lookups) can reach this dentry.
+         */
+        BUG_ON((int)dentry->d_lockref.count > 0);
+        this_cpu_dec(nr_dentry);
+        if (dentry->d_op && dentry->d_op->d_release)
+                dentry->d_op->d_release(dentry);
+        spin_lock(&dentry->d_lock);
+        if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+                dentry->d_flags |= DCACHE_MAY_FREE;
+                can_free = false;
+        }
+        spin_unlock(&dentry->d_lock);
+        if (likely(can_free))
+                dentry_free(dentry);
+}
+/*
+ * Finish off a dentry we've decided to kill.
+ * dentry->d_lock must be held, returns with it unlocked.
+ * If ref is non-zero, then decrement the refcount too.
+ * Returns dentry requiring refcount drop, or NULL if we're done.
+ */
+static struct dentry *dentry_kill(struct dentry *dentry)
+        __releases(dentry->d_lock)
+{
+        struct inode *inode = dentry->d_inode;
+        struct dentry *parent = NULL;
+        if (inode && unlikely(!spin_trylock(&inode->i_lock)))
+                goto failed;
+        if (!IS_ROOT(dentry)) {
+                parent = dentry->d_parent;
+                if (unlikely(!spin_trylock(&parent->d_lock))) {
+                        if (inode)
+                                spin_unlock(&inode->i_lock);
+                        goto failed;
+                }
+        }
+        __dentry_kill(dentry);
+        return parent;
+failed:
+        spin_unlock(&dentry->d_lock);
+        cpu_relax();
+        return dentry; /* try again with same dentry */
+}
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+        struct dentry *parent = dentry->d_parent;
+        if (IS_ROOT(dentry))
+                return NULL;
+        if (likely(spin_trylock(&parent->d_lock)))
+                return parent;
+        spin_unlock(&dentry->d_lock);
+        rcu_read_lock();
+again:
+        parent = ACCESS_ONCE(dentry->d_parent);
+        spin_lock(&parent->d_lock);
+        /*
+         * We can't blindly lock dentry until we are sure
+         * that we won't violate the locking order.
+         * Any changes of dentry->d_parent must have
+         * been done with parent->d_lock held, so
+         * spin_lock() above is enough of a barrier
+         * for checking if it's still our child.
+         */
+        if (unlikely(parent != dentry->d_parent)) {
+                spin_unlock(&parent->d_lock);
+                goto again;
+        }
+        rcu_read_unlock();
+        if (parent != dentry)
+                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+        else
+                parent = NULL;
+        return parent;
 }
 /* 
@@ -602,7 +612,7 @@ repeat:
        return;
 kill_it:
-        dentry = dentry_kill(dentry, 1);
+        dentry = dentry_kill(dentry);
        if (dentry)
                goto repeat;
 }
@@ -815,64 +825,15 @@ restart:
 }
 EXPORT_SYMBOL(d_prune_aliases);
-/*
- * Try to throw away a dentry - free the inode, dput the parent.
- * Requires dentry->d_lock is held, and dentry->d_count == 0.
- * Releases dentry->d_lock.
- *
- * This may fail if locks cannot be acquired no problem, just try again.
- */
-static struct dentry * try_prune_one_dentry(struct dentry *dentry)
-        __releases(dentry->d_lock)
-{
-        struct dentry *parent;
-        parent = dentry_kill(dentry, 0);
-        /*
-         * If dentry_kill returns NULL, we have nothing more to do.
-         * if it returns the same dentry, trylocks failed. In either
-         * case, just loop again.
-         *
-         * Otherwise, we need to prune ancestors too. This is necessary
-         * to prevent quadratic behavior of shrink_dcache_parent(), but
-         * is also expected to be beneficial in reducing dentry cache
-         * fragmentation.
-         */
-        if (!parent)
-                return NULL;
-        if (parent == dentry)
-                return dentry;
-        /* Prune ancestors. */
-        dentry = parent;
-        while (dentry) {
-                if (lockref_put_or_lock(&dentry->d_lockref))
-                        return NULL;
-                dentry = dentry_kill(dentry, 1);
-        }
-        return NULL;
-}
 static void shrink_dentry_list(struct list_head *list)
 {
-        struct dentry *dentry;
+        struct dentry *dentry, *parent;
-        rcu_read_lock();
-        for (;;) {
-                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
-                if (&dentry->d_lru == list)
-                        break; /* empty */
-                /*
+        while (!list_empty(list)) {
-                 * Get the dentry lock, and re-verify that the dentry is
+                struct inode *inode;
-                 * this on the shrinking list. If it is, we know that
+                dentry = list_entry(list->prev, struct dentry, d_lru);
-                 * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
-                 */
                spin_lock(&dentry->d_lock);
-                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
+                parent = lock_parent(dentry);
-                        spin_unlock(&dentry->d_lock);
-                        continue;
-                }
                /*
                 * The dispose list is isolated and dentries are not accounted
@@ -885,30 +846,63 @@ static void shrink_dentry_list(struct list_head *list)
                 * We found an inuse dentry which was not removed from
                 * the LRU because of laziness during lookup. Do not free it.
                 */
-                if (dentry->d_lockref.count) {
+                if ((int)dentry->d_lockref.count > 0) {
                        spin_unlock(&dentry->d_lock);
+                        if (parent)
+                                spin_unlock(&parent->d_lock);
                        continue;
                }
-                rcu_read_unlock();
-                /*
-                 * If 'try_to_prune()' returns a dentry, it will
-                 * be the same one we passed in, and d_lock will
-                 * have been held the whole time, so it will not
-                 * have been added to any other lists. We failed
-                 * to get the inode lock.
-                 *
-                 * We just add it back to the shrink list.
-                 */
-                dentry = try_prune_one_dentry(dentry);
-                rcu_read_lock();
+                if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
-                if (dentry) {
+                        bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
+                        spin_unlock(&dentry->d_lock);
+                        if (parent)
+                                spin_unlock(&parent->d_lock);
+                        if (can_free)
+                                dentry_free(dentry);
+                        continue;
+                }
+                inode = dentry->d_inode;
+                if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
                        d_shrink_add(dentry, list);
                        spin_unlock(&dentry->d_lock);
+                        if (parent)
+                                spin_unlock(&parent->d_lock);
+                        continue;
+                }
+                __dentry_kill(dentry);
+                /*
+                 * We need to prune ancestors too. This is necessary to prevent
+                 * quadratic behavior of shrink_dcache_parent(), but is also
+                 * expected to be beneficial in reducing dentry cache
+                 * fragmentation.
+                 */
+                dentry = parent;
+                while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
+                        parent = lock_parent(dentry);
+                        if (dentry->d_lockref.count != 1) {
+                                dentry->d_lockref.count--;
+                                spin_unlock(&dentry->d_lock);
+                                if (parent)
+                                        spin_unlock(&parent->d_lock);
+                                break;
+                        }
+                        inode = dentry->d_inode;        /* can't be NULL */
+                        if (unlikely(!spin_trylock(&inode->i_lock))) {
+                                spin_unlock(&dentry->d_lock);
+                                if (parent)
+                                        spin_unlock(&parent->d_lock);
+                                cpu_relax();
+                                continue;
+                        }
+                        __dentry_kill(dentry);
+                        dentry = parent;
                }
        }
-        rcu_read_unlock();
 }
 static enum lru_status
@@ -1261,34 +1255,23 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
        if (data->start == dentry)
                goto out;
-        /*
+        if (dentry->d_flags & DCACHE_SHRINK_LIST) {
-         * move only zero ref count dentries to the dispose list.
-         *
-         * Those which are presently on the shrink list, being processed
-         * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
-         * loop in shrink_dcache_parent() might not make any progress
-         * and loop forever.
-         */
-        if (dentry->d_lockref.count) {
-                dentry_lru_del(dentry);
-        } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
-                /*
-                 * We can't use d_lru_shrink_move() because we
-                 * need to get the global LRU lock and do the
-                 * LRU accounting.
-                 */
-                d_lru_del(dentry);
-                d_shrink_add(dentry, &data->dispose);
                data->found++;
-                ret = D_WALK_NORETRY;
+        } else {
+                if (dentry->d_flags & DCACHE_LRU_LIST)
+                        d_lru_del(dentry);
+                if (!dentry->d_lockref.count) {
+                        d_shrink_add(dentry, &data->dispose);
+                        data->found++;
+                }
        }
        /*
         * We can return to the caller if we have found some (this
         * ensures forward progress). We'll be coming back to find
         * the rest.
         */
-        if (data->found && need_resched())
+        if (!list_empty(&data->dispose))
-                ret = D_WALK_QUIT;
+                ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
 out:
        return ret;
 }
@@ -1318,45 +1301,35 @@ void shrink_dcache_parent(struct dentry *parent)
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
-static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry)
+static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
 {
-        struct select_data *data = _data;
+        /* it has busy descendents; complain about those instead */
-        enum d_walk_ret ret = D_WALK_CONTINUE;
+        if (!list_empty(&dentry->d_subdirs))
+                return D_WALK_CONTINUE;
-        if (dentry->d_lockref.count) {
+        /* root with refcount 1 is fine */
-                dentry_lru_del(dentry);
+        if (dentry == _data && dentry->d_lockref.count == 1)
-                if (likely(!list_empty(&dentry->d_subdirs)))
+                return D_WALK_CONTINUE;
-                        goto out;
-                if (dentry == data->start && dentry->d_lockref.count == 1)
+        printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
-                        goto out;
+                        " still in use (%d) [unmount of %s %s]\n",
-                printk(KERN_ERR
-                       "BUG: Dentry %p{i=%lx,n=%s}"
-                       " still in use (%d)"
-                       " [unmount of %s %s]\n",
                       dentry,
                       dentry->d_inode ?
                       dentry->d_inode->i_ino : 0UL,
-                       dentry->d_name.name,
+                       dentry,
                       dentry->d_lockref.count,
                       dentry->d_sb->s_type->name,
                       dentry->d_sb->s_id);
-                BUG();
+        WARN_ON(1);
-        } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+        return D_WALK_CONTINUE;
-                /*
+}
-                 * We can't use d_lru_shrink_move() because we
-                 * need to get the global LRU lock and do the
+static void do_one_tree(struct dentry *dentry)
-                 * LRU accounting.
+{
-                 */
+        shrink_dcache_parent(dentry);
-                if (dentry->d_flags & DCACHE_LRU_LIST)
+        d_walk(dentry, dentry, umount_check, NULL);
-                        d_lru_del(dentry);
+        d_drop(dentry);
-                d_shrink_add(dentry, &data->dispose);
+        dput(dentry);
-                data->found++;
-                ret = D_WALK_NORETRY;
-        }
-out:
-        if (data->found && need_resched())
-                ret = D_WALK_QUIT;
-        return ret;
 }
 /*
@@ -1366,40 +1339,15 @@ void shrink_dcache_for_umount(struct super_block *sb)
 {
        struct dentry *dentry;
-        if (down_read_trylock(&sb->s_umount))
+        WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
-                BUG();
        dentry = sb->s_root;
        sb->s_root = NULL;
-        for (;;) {
+        do_one_tree(dentry);
-                struct select_data data;
-                INIT_LIST_HEAD(&data.dispose);
-                data.start = dentry;
-                data.found = 0;
-                d_walk(dentry, &data, umount_collect, NULL);
-                if (!data.found)
-                        break;
-                shrink_dentry_list(&data.dispose);
-                cond_resched();
-        }
-        d_drop(dentry);
-        dput(dentry);
        while (!hlist_bl_empty(&sb->s_anon)) {
-                struct select_data data;
+                dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash));
-                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
+                do_one_tree(dentry);
-                INIT_LIST_HEAD(&data.dispose);
-                data.start = NULL;
-                data.found = 0;
-                d_walk(dentry, &data, umount_collect, NULL);
-                if (data.found)
-                        shrink_dentry_list(&data.dispose);
-                cond_resched();
        }
 }
@@ -1647,8 +1595,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        unsigned add_flags = d_flags_for_inode(inode);
        spin_lock(&dentry->d_lock);
-        dentry->d_flags &= ~DCACHE_ENTRY_TYPE;
+        __d_set_type(dentry, add_flags);
-        dentry->d_flags |= add_flags;
        if (inode)
                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
        dentry->d_inode = inode;
diff --git a/fs/exec.c b/fs/exec.c
index 476f3ebf437e..238b7aa26f68 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -657,10 +657,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
        unsigned long rlim_stack;
 #ifdef CONFIG_STACK_GROWSUP
-        /* Limit stack size to 1GB */
+        /* Limit stack size */
        stack_base = rlimit_max(RLIMIT_STACK);
-        if (stack_base > (1 << 30))
+        if (stack_base > STACK_SIZE_MAX)
-                stack_base = 1 << 30;
+                stack_base = STACK_SIZE_MAX;
        /* Make sure we didn't let the argument array grow too large. */
        if (vma->vm_end - vma->vm_start > stack_base)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b1436bbc..5c56785007e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
                        continue;
                x = ext4_count_free(bitmap_bh->b_data,
-                                    EXT4_BLOCKS_PER_GROUP(sb) / 8);
+                                    EXT4_CLUSTERS_PER_GROUP(sb) / 8);
                printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
                        i, ext4_free_group_clusters(sb, gdp), x);
                bitmap_count += x;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f1c65dc7cc0a..66946aa62127 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
        up_write(&EXT4_I(inode)->i_data_sem);
 }
-/*
- * Update i_disksize after writeback has been started. Races with truncate
- * are avoided by checking i_size under i_data_sem.
- */
-static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
-{
-        loff_t i_size;
-        down_write(&EXT4_I(inode)->i_data_sem);
-        i_size = i_size_read(inode);
-        if (newsize > i_size)
-                newsize = i_size;
-        if (newsize > EXT4_I(inode)->i_disksize)
-                EXT4_I(inode)->i_disksize = newsize;
-        up_write(&EXT4_I(inode)->i_data_sem);
-}
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 82df3ce9874a..01b0c208f625 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
                return PTR_ERR(path);
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
+        if (!ex) {
+                EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                 (unsigned long) map->m_lblk);
+                return -EIO;
+        }
        uninitialized = ext4_ext_is_uninitialized(ex);
        split_flag1 = 0;
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
                }
                depth = ext_depth(inode);
                ex = path[depth].p_ext;
+                if (!ex) {
+                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                         (unsigned long) map->m_lblk);
+                        err = -EIO;
+                        goto out;
+                }
        }
        err = ext4_ext_get_access(handle, inode, path + depth);
@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        trace_ext4_zero_range(inode, offset, len, mode);
+        if (!S_ISREG(inode->i_mode))
+                return -EINVAL;
        /*
         * Write out all dirty pages to avoid race conditions
         * Then release them.
@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_PUNCH_HOLE)
                return ext4_punch_hole(inode, offset, len);
-        if (mode & FALLOC_FL_COLLAPSE_RANGE)
-                return ext4_collapse_range(inode, offset, len);
        ret = ext4_convert_inline_data(inode);
        if (ret)
                return ret;
@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
+        if (mode & FALLOC_FL_COLLAPSE_RANGE)
+                return ext4_collapse_range(inode, offset, len);
        if (mode & FALLOC_FL_ZERO_RANGE)
                return ext4_zero_range(file, offset, len, mode);
@@ -5229,18 +5243,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                        if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
                                update = 1;
-                        *start = ex_last->ee_block +
+                        *start = le32_to_cpu(ex_last->ee_block) +
                                ext4_ext_get_actual_len(ex_last);
                        while (ex_start <= ex_last) {
-                                ex_start->ee_block -= shift;
+                                le32_add_cpu(&ex_start->ee_block, -shift);
-                                if (ex_start >
+                                /* Try to merge to the left. */
-                                        EXT_FIRST_EXTENT(path[depth].p_hdr)) {
+                                if ((ex_start >
-                                        if (ext4_ext_try_to_merge_right(inode,
+                                     EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
-                                                path, ex_start - 1))
+                                    ext4_ext_try_to_merge_right(inode,
-                                                ex_last--;
+                                                        path, ex_start - 1))
-                                }
+                                        ex_last--;
-                                ex_start++;
+                                else
+                                        ex_start++;
                        }
                        err = ext4_ext_dirty(handle, inode, path + depth);
                        if (err)
@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                if (err)
                        goto out;
-                path[depth].p_idx->ei_block -= shift;
+                le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto out;
@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                return ret;
        }
-        stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
+        stop_block = le32_to_cpu(extent->ee_block) +
+                        ext4_ext_get_actual_len(extent);
        ext4_ext_drop_refs(path);
        kfree(path);
@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
         * enough to accomodate the shift.
         */
        path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+        if (IS_ERR(path))
+                return PTR_ERR(path);
        depth = path->p_depth;
        extent =  path[depth].p_ext;
-        ex_start = extent->ee_block;
+        if (extent) {
-        ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
+                ex_start = le32_to_cpu(extent->ee_block);
+                ex_end = le32_to_cpu(extent->ee_block) +
+                        ext4_ext_get_actual_len(extent);
+        } else {
+                ex_start = 0;
+                ex_end = 0;
+        }
        ext4_ext_drop_refs(path);
        kfree(path);
@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                        return PTR_ERR(path);
                depth = path->p_depth;
                extent = path[depth].p_ext;
-                current_block = extent->ee_block;
+                if (!extent) {
+                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                         (unsigned long) start);
+                        return -EIO;
+                }
+                current_block = le32_to_cpu(extent->ee_block);
                if (start > current_block) {
                        /* Hole, move to the next extent */
                        ret = mext_next_extent(inode, path, &extent);
@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_lblk_t punch_start, punch_stop;
        handle_t *handle;
        unsigned int credits;
-        loff_t new_size;
+        loff_t new_size, ioffset;
        int ret;
-        BUG_ON(offset + len > i_size_read(inode));
        /* Collapse range works only on fs block size aligned offsets. */
        if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
            len & (EXT4_BLOCK_SIZE(sb) - 1))
                return -EINVAL;
        if (!S_ISREG(inode->i_mode))
+                return -EINVAL;
+        if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
                return -EOPNOTSUPP;
        trace_ext4_collapse_range(inode, offset, len);
@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
        punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
+        /* Call ext4_force_commit to flush all data in case of data=journal. */
+        if (ext4_should_journal_data(inode)) {
+                ret = ext4_force_commit(inode->i_sb);
+                if (ret)
+                        return ret;
+        }
+        /*
+         * Need to round down offset to be aligned with page size boundary
+         * for page size > block size.
+         */
+        ioffset = round_down(offset, PAGE_SIZE);
        /* Write out all dirty pages */
-        ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
+        ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                                           LLONG_MAX);
        if (ret)
                return ret;
        /* Take mutex lock */
        mutex_lock(&inode->i_mutex);
-        /* It's not possible punch hole on append only file */
+        /*
-        if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+         * There is no need to overlap collapse range with EOF, in which case
-                ret = -EPERM;
+         * it is effectively a truncate operation
-                goto out_mutex;
+         */
-        }
+        if (offset + len >= i_size_read(inode)) {
+                ret = -EINVAL;
-        if (IS_SWAPFILE(inode)) {
-                ret = -ETXTBSY;
                goto out_mutex;
        }
@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
-        truncate_pagecache_range(inode, offset, -1);
+        truncate_pagecache(inode, ioffset);
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_discard_preallocations(inode);
        ret = ext4_es_remove_extent(inode, punch_start,
-                                    EXT_MAX_BLOCKS - punch_start - 1);
+                                    EXT_MAX_BLOCKS - punch_start);
        if (ret) {
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
        }
+        ext4_discard_preallocations(inode);
        ret = ext4_ext_shift_extents(inode, handle, punch_stop,
                                     punch_stop - punch_start);
@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        }
        new_size = i_size_read(inode) - len;
-        truncate_setsize(inode, new_size);
+        i_size_write(inode, new_size);
        EXT4_I(inode)->i_disksize = new_size;
-        ext4_discard_preallocations(inode);
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0a014a7194b2..0ebc21204b51 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -810,7 +810,7 @@ retry:
                        newes.es_lblk = end + 1;
                        newes.es_len = len2;
-                        block = 0x7FDEADBEEF;
+                        block = 0x7FDEADBEEFULL;
                        if (ext4_es_is_written(&orig_es) ||
                            ext4_es_is_unwritten(&orig_es))
                                block = ext4_es_pblock(&orig_es) +
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ca7502d89fde..063fc1538355 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
        size_t count = iov_length(iov, nr_segs);
        loff_t final_size = pos + count;
-        if (pos >= inode->i_size)
+        if (pos >= i_size_read(inode))
                return 0;
        if ((pos & blockmask) || (final_size & blockmask))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b0d2c7d5408..d7b7462a0e13 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
        if (unlikely(map->m_len > INT_MAX))
                map->m_len = INT_MAX;
+        /* We can handle the block number less than EXT_MAX_BLOCKS */
+        if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
+                return -EIO;
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                ext4_es_lru_add(inode);
@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                        return err;
        } while (map->m_len);
-        /* Update on-disk size after IO is submitted */
+        /*
+         * Update on-disk size after IO is submitted.  Races with
+         * truncate are avoided by checking i_size under i_data_sem.
+         */
        disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
        if (disksize > EXT4_I(inode)->i_disksize) {
                int err2;
+                loff_t i_size;
-                ext4_wb_update_i_disksize(inode, disksize);
+                down_write(&EXT4_I(inode)->i_data_sem);
+                i_size = i_size_read(inode);
+                if (disksize > i_size)
+                        disksize = i_size;
+                if (disksize > EXT4_I(inode)->i_disksize)
+                        EXT4_I(inode)->i_disksize = disksize;
                err2 = ext4_mark_inode_dirty(handle, inode);
+                up_write(&EXT4_I(inode)->i_data_sem);
                if (err2)
                        ext4_error(inode->i_sb,
                                   "Failed to mark inode %lu dirty",
@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        }
        mutex_lock(&inode->i_mutex);
-        /* It's not possible punch hole on append only file */
-        if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-                ret = -EPERM;
-                goto out_mutex;
-        }
-        if (IS_SWAPFILE(inode)) {
-                ret = -ETXTBSY;
-                goto out_mutex;
-        }
        /* No need to punch hole beyond i_size */
        if (offset >= inode->i_size)
@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                ret = ext4_free_hole_blocks(handle, inode, first_block,
                                            stop_block);
-        ext4_discard_preallocations(inode);
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
@@ -4423,21 +4427,20 @@ out_brelse:
 *
 * We are called from a few places:
 *
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
 *   Here, there will be no transaction running. We wait for any running
 *   transaction to commit.
 *
- * - Within sys_sync(), kupdate and such.
+ * - Within flush work (sys_sync(), kupdate and such).
- *   We wait on commit, if tol to.
+ *   We wait on commit, if told to.
 *
- * - Within prune_icache() (PF_MEMALLOC == true)
+ * - Within iput_final() -> write_inode_now()
- *   Here we simply return.  We can't afford to block kswapd on the
+ *   We wait on commit, if told to.
- *   journal commit.
 *
 * In all cases it is actually safe for us to return without doing anything,
 * because the inode has been copied into a raw inode buffer in
- * ext4_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * ext4_mark_inode_dirty().  This is a correctness thing for WB_SYNC_ALL
- * knfsd.
+ * writeback.
 *
 * Note that we are absolutely dependent upon all inode dirtiers doing the
 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -4449,15 +4452,15 @@ out_brelse:
 *      stuff();
 *      inode->i_size = expr;
 *
- * is in error because a kswapd-driven write_inode() could occur while
+ * is in error because write_inode() could occur while `stuff()' is running,
- * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+ * and the new i_size will be lost.  Plus the inode will no longer be on the
- * will no longer be on the superblock's dirty inode list.
+ * superblock's dirty inode list.
 */
 int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        int err;
-        if (current->flags & PF_MEMALLOC)
+        if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                return 0;
        if (EXT4_SB(inode->i_sb)->s_journal) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a888cac76e9c..c8238a26818c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
        poff = block % blocks_per_page;
        page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
        if (!page)
-                return -EIO;
+                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
        e4b->bd_bitmap_page = page;
        e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
        pnum = block / blocks_per_page;
        page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
        if (!page)
-                return -EIO;
+                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
        e4b->bd_buddy_page = page;
        return 0;
@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                        unlock_page(page);
                }
        }
-        if (page == NULL || !PageUptodate(page)) {
+        if (page == NULL) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        if (!PageUptodate(page)) {
                ret = -EIO;
                goto err;
        }
@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                        unlock_page(page);
                }
        }
-        if (page == NULL || !PageUptodate(page)) {
+        if (page == NULL) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        if (!PageUptodate(page)) {
                ret = -EIO;
                goto err;
        }
@@ -5008,6 +5016,8 @@ error_return:
 */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
                             ext4_group_t group, struct ext4_buddy *e4b)
+__releases(bitlock)
+__acquires(bitlock)
 {
        struct ext4_free_extent ex;
        int ret = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ab95508e3d40..c18d95b50540 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
        if (error) {
                struct inode *inode = io_end->inode;
-                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+                ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
                             "(offset %llu size %ld starting block %llu)",
-                             inode->i_ino,
+                             error, inode->i_ino,
                             (unsigned long long) io_end->offset,
                             (long) io_end->size,
                             (unsigned long long)
                             bi_sector >> (inode->i_blkbits - 9));
+                mapping_set_error(inode->i_mapping, error);
        }
        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f3c667091618..6f9e6fadac04 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
+        /*
+         * set up enough so that it can read an inode,
+         * and create new inode for buddy allocator
+         */
+        sbi->s_gdb_count = db_count;
+        if (!test_opt(sb, NOLOAD) &&
+            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+                sb->s_op = &ext4_sops;
+        else
+                sb->s_op = &ext4_nojournal_sops;
+        ext4_ext_init(sb);
+        err = ext4_mb_init(sb);
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+                         err);
+                goto failed_mount2;
+        }
        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-                goto failed_mount2;
+                goto failed_mount2a;
        }
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
                if (!ext4_fill_flex_info(sb)) {
                        ext4_msg(sb, KERN_ERR,
                               "unable to initialize "
                               "flex_bg meta info!");
-                        goto failed_mount2;
+                        goto failed_mount2a;
                }
-        sbi->s_gdb_count = db_count;
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_extent_max_zeroout_kb = 32;
-        /*
-         * set up enough so that it can read an inode
-         */
-        if (!test_opt(sb, NOLOAD) &&
-            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
-                sb->s_op = &ext4_sops;
-        else
-                sb->s_op = &ext4_nojournal_sops;
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -4113,21 +4124,13 @@ no_journal:
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
                         "reserved pool", ext4_calculate_resv_clusters(sb));
-                goto failed_mount4a;
+                goto failed_mount5;
        }
        err = ext4_setup_system_zone(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize system "
                         "zone (%d)", err);
-                goto failed_mount4a;
-        }
-        ext4_ext_init(sb);
-        err = ext4_mb_init(sb);
-        if (err) {
-                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
-                         err);
                goto failed_mount5;
        }
@@ -4204,11 +4207,8 @@ failed_mount8:
 failed_mount7:
        ext4_unregister_li_request(sb);
 failed_mount6:
-        ext4_mb_release(sb);
-failed_mount5:
-        ext4_ext_release(sb);
        ext4_release_system_zone(sb);
-failed_mount4a:
+failed_mount5:
        dput(sb->s_root);
        sb->s_root = NULL;
 failed_mount4:
@@ -4232,11 +4232,14 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
+failed_mount2a:
+        ext4_mb_release(sb);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
        ext4_kvfree(sbi->s_group_desc);
 failed_mount:
+        ext4_ext_release(sb);
        if (sbi->s_chksum_driver)
                crypto_free_shash(sbi->s_chksum_driver);
        if (sbi->s_proc) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1f5cf5880718..4eec399ec807 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 }
 /*
- * Release the xattr block BH: If the reference count is > 1, decrement
+ * Release the xattr block BH: If the reference count is > 1, decrement it;
- * it; otherwise free the block.
+ * otherwise free the block.
 */
 static void
 ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                if (ce)
                        mb_cache_entry_free(ce);
                get_bh(bh);
+                unlock_buffer(bh);
                ext4_free_blocks(handle, inode, bh, 0, 1,
                                 EXT4_FREE_BLOCKS_METADATA |
                                 EXT4_FREE_BLOCKS_FORGET);
-                unlock_buffer(bh);
        } else {
                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
                if (ce)
                        mb_cache_entry_release(ce);
+                /*
+                 * Beware of this ugliness: Releasing of xattr block references
+                 * from different inodes can race and so we have to protect
+                 * from a race where someone else frees the block (and releases
+                 * its journal_head) before we are done dirtying the buffer. In
+                 * nojournal mode this race is harmless and we actually cannot
+                 * call ext4_handle_dirty_xattr_block() with locked buffer as
+                 * that function can call sync_dirty_buffer() so for that case
+                 * we handle the dirtying after unlocking the buffer.
+                 */
+                if (ext4_handle_valid(handle))
+                        error = ext4_handle_dirty_xattr_block(handle, inode,
+                                                              bh);
                unlock_buffer(bh);
-                error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+                if (!ext4_handle_valid(handle))
+                        error = ext4_handle_dirty_xattr_block(handle, inode,
+                                                              bh);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
                dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9ead1596399a..72c82f69b01b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                break;
 #if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
-        case F_GETLKP:
+        case F_OFD_GETLK:
 #endif
        case F_GETLK:
                err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
                break;
 #if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
 #endif
                /* Fallthrough */
        case F_SETLK:
@@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
        
        switch (cmd) {
        case F_GETLK64:
-        case F_GETLKP:
+        case F_OFD_GETLK:
                err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
                break;
        case F_SETLK64:
        case F_SETLKW64:
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                err = fcntl_setlk64(fd, f.file, cmd,
                                (struct flock64 __user *) arg);
                break;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index a0b0855d00a9..205e0d5d5307 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -348,7 +348,7 @@ int __init fuse_ctl_init(void)
        return register_filesystem(&fuse_ctl_fs_type);
 }
-void fuse_ctl_cleanup(void)
+void __exit fuse_ctl_cleanup(void)
 {
        unregister_filesystem(&fuse_ctl_fs_type);
 }
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5b4e035b364c..42198359fa1b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -679,6 +679,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
        return create_new_entry(fc, req, dir, entry, S_IFLNK);
 }
+static inline void fuse_update_ctime(struct inode *inode)
+{
+        if (!IS_NOCMTIME(inode)) {
+                inode->i_ctime = current_fs_time(inode->i_sb);
+                mark_inode_dirty_sync(inode);
+        }
+}
 static int fuse_unlink(struct inode *dir, struct dentry *entry)
 {
        int err;
@@ -713,6 +721,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
                fuse_invalidate_attr(inode);
                fuse_invalidate_attr(dir);
                fuse_invalidate_entry_cache(entry);
+                fuse_update_ctime(inode);
        } else if (err == -EINTR)
                fuse_invalidate_entry(entry);
        return err;
@@ -743,23 +752,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
        return err;
 }
-static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
-                       struct inode *newdir, struct dentry *newent)
+                              struct inode *newdir, struct dentry *newent,
+                              unsigned int flags, int opcode, size_t argsize)
 {
        int err;
-        struct fuse_rename_in inarg;
+        struct fuse_rename2_in inarg;
        struct fuse_conn *fc = get_fuse_conn(olddir);
-        struct fuse_req *req = fuse_get_req_nopages(fc);
+        struct fuse_req *req;
+        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
-        memset(&inarg, 0, sizeof(inarg));
+        memset(&inarg, 0, argsize);
        inarg.newdir = get_node_id(newdir);
-        req->in.h.opcode = FUSE_RENAME;
+        inarg.flags = flags;
+        req->in.h.opcode = opcode;
        req->in.h.nodeid = get_node_id(olddir);
        req->in.numargs = 3;
-        req->in.args[0].size = sizeof(inarg);
+        req->in.args[0].size = argsize;
        req->in.args[0].value = &inarg;
        req->in.args[1].size = oldent->d_name.len + 1;
        req->in.args[1].value = oldent->d_name.name;
@@ -771,15 +783,22 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
        if (!err) {
                /* ctime changes */
                fuse_invalidate_attr(oldent->d_inode);
+                fuse_update_ctime(oldent->d_inode);
+                if (flags & RENAME_EXCHANGE) {
+                        fuse_invalidate_attr(newent->d_inode);
+                        fuse_update_ctime(newent->d_inode);
+                }
                fuse_invalidate_attr(olddir);
                if (olddir != newdir)
                        fuse_invalidate_attr(newdir);
                /* newent will end up negative */
-                if (newent->d_inode) {
+                if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
                        fuse_invalidate_attr(newent->d_inode);
                        fuse_invalidate_entry_cache(newent);
+                        fuse_update_ctime(newent->d_inode);
                }
        } else if (err == -EINTR) {
                /* If request was interrupted, DEITY only knows if the
@@ -795,6 +814,36 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent,
        return err;
 }
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+                       struct inode *newdir, struct dentry *newent)
+{
+        return fuse_rename_common(olddir, oldent, newdir, newent, 0,
+                                  FUSE_RENAME, sizeof(struct fuse_rename_in));
+}
+static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
+                        struct inode *newdir, struct dentry *newent,
+                        unsigned int flags)
+{
+        struct fuse_conn *fc = get_fuse_conn(olddir);
+        int err;
+        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+                return -EINVAL;
+        if (fc->no_rename2 || fc->minor < 23)
+                return -EINVAL;
+        err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
+                                 FUSE_RENAME2, sizeof(struct fuse_rename2_in));
+        if (err == -ENOSYS) {
+                fc->no_rename2 = 1;
+                err = -EINVAL;
+        }
+        return err;
+}
 static int fuse_link(struct dentry *entry, struct inode *newdir,
                     struct dentry *newent)
 {
@@ -829,6 +878,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
                inc_nlink(inode);
                spin_unlock(&fc->lock);
                fuse_invalidate_attr(inode);
+                fuse_update_ctime(inode);
        } else if (err == -EINTR) {
                fuse_invalidate_attr(inode);
        }
@@ -846,6 +896,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
                attr->size = i_size_read(inode);
                attr->mtime = inode->i_mtime.tv_sec;
                attr->mtimensec = inode->i_mtime.tv_nsec;
+                attr->ctime = inode->i_ctime.tv_sec;
+                attr->ctimensec = inode->i_ctime.tv_nsec;
        }
        stat->dev = inode->i_sb->s_dev;
@@ -1504,7 +1556,7 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
 }
 static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
-                           bool trust_local_mtime)
+                           bool trust_local_cmtime)
 {
        unsigned ivalid = iattr->ia_valid;
@@ -1523,13 +1575,18 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
                if (!(ivalid & ATTR_ATIME_SET))
                        arg->valid |= FATTR_ATIME_NOW;
        }
-        if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) {
+        if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
                arg->valid |= FATTR_MTIME;
                arg->mtime = iattr->ia_mtime.tv_sec;
                arg->mtimensec = iattr->ia_mtime.tv_nsec;
-                if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime)
+                if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
                        arg->valid |= FATTR_MTIME_NOW;
        }
+        if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
+                arg->valid |= FATTR_CTIME;
+                arg->ctime = iattr->ia_ctime.tv_sec;
+                arg->ctimensec = iattr->ia_ctime.tv_nsec;
+        }
 }
 /*
@@ -1597,39 +1654,38 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
 /*
 * Flush inode->i_mtime to the server
 */
-int fuse_flush_mtime(struct file *file, bool nofail)
+int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
 {
-        struct inode *inode = file->f_mapping->host;
-        struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_req *req = NULL;
+        struct fuse_req *req;
        struct fuse_setattr_in inarg;
        struct fuse_attr_out outarg;
        int err;
-        if (nofail) {
+        req = fuse_get_req_nopages(fc);
-                req = fuse_get_req_nofail_nopages(fc, file);
+        if (IS_ERR(req))
-        } else {
+                return PTR_ERR(req);
-                req = fuse_get_req_nopages(fc);
-                if (IS_ERR(req))
-                        return PTR_ERR(req);
-        }
        memset(&inarg, 0, sizeof(inarg));
        memset(&outarg, 0, sizeof(outarg));
-        inarg.valid |= FATTR_MTIME;
+        inarg.valid = FATTR_MTIME;
        inarg.mtime = inode->i_mtime.tv_sec;
        inarg.mtimensec = inode->i_mtime.tv_nsec;
+        if (fc->minor >= 23) {
+                inarg.valid |= FATTR_CTIME;
+                inarg.ctime = inode->i_ctime.tv_sec;
+                inarg.ctimensec = inode->i_ctime.tv_nsec;
+        }
+        if (ff) {
+                inarg.valid |= FATTR_FH;
+                inarg.fh = ff->fh;
+        }
        fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
        fuse_request_send(fc, req);
        err = req->out.h.error;
        fuse_put_request(fc, req);
-        if (!err)
-                clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
        return err;
 }
@@ -1653,7 +1709,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
        bool is_wb = fc->writeback_cache;
        loff_t oldsize;
        int err;
-        bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode);
+        bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
        if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
                attr->ia_valid |= ATTR_FORCE;
@@ -1678,11 +1734,13 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
        if (is_truncate) {
                fuse_set_nowrite(inode);
                set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+                if (trust_local_cmtime && attr->ia_size != inode->i_size)
+                        attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
        }
        memset(&inarg, 0, sizeof(inarg));
        memset(&outarg, 0, sizeof(outarg));
-        iattr_to_fattr(attr, &inarg, trust_local_mtime);
+        iattr_to_fattr(attr, &inarg, trust_local_cmtime);
        if (file) {
                struct fuse_file *ff = file->private_data;
                inarg.valid |= FATTR_FH;
@@ -1711,9 +1769,12 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
        spin_lock(&fc->lock);
        /* the kernel maintains i_mtime locally */
-        if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) {
+        if (trust_local_cmtime) {
-                inode->i_mtime = attr->ia_mtime;
+                if (attr->ia_valid & ATTR_MTIME)
-                clear_bit(FUSE_I_MTIME_DIRTY, &fi->state);
+                        inode->i_mtime = attr->ia_mtime;
+                if (attr->ia_valid & ATTR_CTIME)
+                        inode->i_ctime = attr->ia_ctime;
+                /* FIXME: clear I_DIRTY_SYNC? */
        }
        fuse_change_attributes_common(inode, &outarg.attr,
@@ -1810,8 +1871,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
                fc->no_setxattr = 1;
                err = -EOPNOTSUPP;
        }
-        if (!err)
+        if (!err) {
                fuse_invalidate_attr(inode);
+                fuse_update_ctime(inode);
+        }
        return err;
 }
@@ -1941,20 +2004,11 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
                fc->no_removexattr = 1;
                err = -EOPNOTSUPP;
        }
-        if (!err)
+        if (!err) {
                fuse_invalidate_attr(inode);
-        return err;
+                fuse_update_ctime(inode);
-}
-static int fuse_update_time(struct inode *inode, struct timespec *now,
-                            int flags)
-{
-        if (flags & S_MTIME) {
-                inode->i_mtime = *now;
-                set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state);
-                BUG_ON(!S_ISREG(inode->i_mode));
        }
-        return 0;
+        return err;
 }
 static const struct inode_operations fuse_dir_inode_operations = {
@@ -1964,6 +2018,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
        .unlink         = fuse_unlink,
        .rmdir          = fuse_rmdir,
        .rename         = fuse_rename,
+        .rename2        = fuse_rename2,
        .link           = fuse_link,
        .setattr        = fuse_setattr,
        .create         = fuse_create,
@@ -1996,7 +2051,6 @@ static const struct inode_operations fuse_common_inode_operations = {
        .getxattr       = fuse_getxattr,
        .listxattr      = fuse_listxattr,
        .removexattr    = fuse_removexattr,
-        .update_time    = fuse_update_time,
 };
 static const struct inode_operations fuse_symlink_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 13f8bdec5110..96d513e01a5d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -223,6 +223,8 @@ void fuse_finish_open(struct inode *inode, struct file *file)
                i_size_write(inode, 0);
                spin_unlock(&fc->lock);
                fuse_invalidate_attr(inode);
+                if (fc->writeback_cache)
+                        file_update_time(file);
        }
        if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
                fuse_link_write_file(file);
@@ -232,18 +234,26 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        int err;
+        bool lock_inode = (file->f_flags & O_TRUNC) &&
+                          fc->atomic_o_trunc &&
+                          fc->writeback_cache;
        err = generic_file_open(inode, file);
        if (err)
                return err;
+        if (lock_inode)
+                mutex_lock(&inode->i_mutex);
        err = fuse_do_open(fc, get_node_id(inode), file, isdir);
-        if (err)
-                return err;
-        fuse_finish_open(inode, file);
+        if (!err)
+                fuse_finish_open(inode, file);
-        return 0;
+        if (lock_inode)
+                mutex_unlock(&inode->i_mutex);
+        return err;
 }
 static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
@@ -314,10 +324,7 @@ static int fuse_release(struct inode *inode, struct file *file)
        /* see fuse_vma_close() for !writeback_cache case */
        if (fc->writeback_cache)
-                filemap_write_and_wait(file->f_mapping);
+                write_inode_now(inode, 1);
-        if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state))
-                fuse_flush_mtime(file, true);
        fuse_release_common(file, FUSE_RELEASE);
@@ -439,7 +446,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        if (fc->no_flush)
                return 0;
-        err = filemap_write_and_wait(file->f_mapping);
+        err = write_inode_now(inode, 1);
        if (err)
                return err;
@@ -480,13 +487,6 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
        if (is_bad_inode(inode))
                return -EIO;
-        err = filemap_write_and_wait_range(inode->i_mapping, start, end);
-        if (err)
-                return err;
-        if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
-                return 0;
        mutex_lock(&inode->i_mutex);
        /*
@@ -494,17 +494,17 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
         * wait for all outstanding writes, before sending the FSYNC
         * request.
         */
-        err = write_inode_now(inode, 0);
+        err = filemap_write_and_wait_range(inode->i_mapping, start, end);
        if (err)
                goto out;
        fuse_sync_writes(inode);
+        err = sync_inode_metadata(inode, 1);
+        if (err)
+                goto out;
-        if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) {
+        if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
-                int err = fuse_flush_mtime(file, false);
+                goto out;
-                if (err)
-                        goto out;
-        }
        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req)) {
@@ -1659,13 +1659,13 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
        fuse_writepage_free(fc, req);
 }
-static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
+static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
-                                             struct fuse_inode *fi)
+                                               struct fuse_inode *fi)
 {
        struct fuse_file *ff = NULL;
        spin_lock(&fc->lock);
-        if (!WARN_ON(list_empty(&fi->write_files))) {
+        if (!list_empty(&fi->write_files)) {
                ff = list_entry(fi->write_files.next, struct fuse_file,
                                write_entry);
                fuse_file_get(ff);
@@ -1675,6 +1675,29 @@ static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
        return ff;
 }
+static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
+                                             struct fuse_inode *fi)
+{
+        struct fuse_file *ff = __fuse_write_file_get(fc, fi);
+        WARN_ON(!ff);
+        return ff;
+}
+int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        struct fuse_file *ff;
+        int err;
+        ff = __fuse_write_file_get(fc, fi);
+        err = fuse_flush_times(inode, ff);
+        if (ff)
+                fuse_file_put(ff, 0);
+        return err;
+}
 static int fuse_writepage_locked(struct page *page)
 {
        struct address_space *mapping = page->mapping;
@@ -2972,6 +2995,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
                           (mode & FALLOC_FL_PUNCH_HOLE);
+        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+                return -EOPNOTSUPP;
        if (fc->no_fallocate)
                return -EOPNOTSUPP;
@@ -3017,12 +3043,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
        if (!(mode & FALLOC_FL_KEEP_SIZE)) {
                bool changed = fuse_write_update_size(inode, offset + length);
-                if (changed && fc->writeback_cache) {
+                if (changed && fc->writeback_cache)
-                        struct fuse_inode *fi = get_fuse_inode(inode);
+                        file_update_time(file);
-                        inode->i_mtime = current_fs_time(inode->i_sb);
-                        set_bit(FUSE_I_MTIME_DIRTY, &fi->state);
-                }
        }
        if (mode & FALLOC_FL_PUNCH_HOLE)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index a257ed8ebee6..7aa5c75e0de1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -119,8 +119,6 @@ enum {
        FUSE_I_INIT_RDPLUS,
        /** An operation changing file size is in progress  */
        FUSE_I_SIZE_UNSTABLE,
-        /** i_mtime has been updated locally; a flush to userspace needed */
-        FUSE_I_MTIME_DIRTY,
 };
 struct fuse_conn;
@@ -544,6 +542,9 @@ struct fuse_conn {
        /** Is fallocate not implemented by fs? */
        unsigned no_fallocate:1;
+        /** Is rename with flags implemented by fs? */
+        unsigned no_rename2:1;
        /** Use enhanced/automatic page cache invalidation. */
        unsigned auto_inval_data:1;
@@ -725,7 +726,7 @@ int fuse_dev_init(void);
 void fuse_dev_cleanup(void);
 int fuse_ctl_init(void);
-void fuse_ctl_cleanup(void);
+void __exit fuse_ctl_cleanup(void);
 /**
 * Allocate a request
@@ -891,7 +892,8 @@ int fuse_dev_release(struct inode *inode, struct file *file);
 bool fuse_write_update_size(struct inode *inode, loff_t pos);
-int fuse_flush_mtime(struct file *file, bool nofail);
+int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
+int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
                    struct file *file);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 8d611696fcad..754dcf23de8a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -175,9 +175,9 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
        if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
                inode->i_mtime.tv_sec   = attr->mtime;
                inode->i_mtime.tv_nsec  = attr->mtimensec;
+                inode->i_ctime.tv_sec   = attr->ctime;
+                inode->i_ctime.tv_nsec  = attr->ctimensec;
        }
-        inode->i_ctime.tv_sec   = attr->ctime;
-        inode->i_ctime.tv_nsec  = attr->ctimensec;
        if (attr->blksize != 0)
                inode->i_blkbits = ilog2(attr->blksize);
@@ -256,6 +256,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
        inode->i_size = attr->size;
        inode->i_mtime.tv_sec  = attr->mtime;
        inode->i_mtime.tv_nsec = attr->mtimensec;
+        inode->i_ctime.tv_sec  = attr->ctime;
+        inode->i_ctime.tv_nsec = attr->ctimensec;
        if (S_ISREG(inode->i_mode)) {
                fuse_init_common(inode);
                fuse_init_file_inode(inode);
@@ -303,7 +305,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
        if ((inode->i_state & I_NEW)) {
                inode->i_flags |= S_NOATIME;
-                if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
+                if (!fc->writeback_cache || !S_ISREG(attr->mode))
                        inode->i_flags |= S_NOCMTIME;
                inode->i_generation = generation;
                inode->i_data.backing_dev_info = &fc->bdi;
@@ -788,6 +790,7 @@ static const struct super_operations fuse_super_operations = {
        .alloc_inode    = fuse_alloc_inode,
        .destroy_inode  = fuse_destroy_inode,
        .evict_inode    = fuse_evict_inode,
+        .write_inode    = fuse_write_inode,
        .drop_inode     = generic_delete_inode,
        .remount_fs     = fuse_remount_fs,
        .put_super      = fuse_put_super,
@@ -890,6 +893,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->async_dio = 1;
                        if (arg->flags & FUSE_WRITEBACK_CACHE)
                                fc->writeback_cache = 1;
+                        if (arg->time_gran && arg->time_gran <= 1000000000)
+                                fc->sb->s_time_gran = arg->time_gran;
+                        else
+                                fc->sb->s_time_gran = 1000000000;
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
@@ -996,7 +1004,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (sb->s_flags & MS_MANDLOCK)
                goto err;
-        sb->s_flags &= ~MS_NOSEC;
+        sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
        if (!parse_fuse_opt((char *) data, &d, is_bdev))
                goto err;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 204027520937..e19d4c0cacae 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1030,6 +1030,11 @@ static int __init init_hugetlbfs_fs(void)
        int error;
        int i;
+        if (!hugepages_supported()) {
+                pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n");
+                return -ENOTSUPP;
+        }
        error = bdi_init(&hugetlbfs_backing_dev_info);
        if (error)
                return error;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 78f3403300af..ac127cd008bf 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -232,9 +232,6 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
        struct rb_node **node = &kn->parent->dir.children.rb_node;
        struct rb_node *parent = NULL;
-        if (kernfs_type(kn) == KERNFS_DIR)
-                kn->parent->dir.subdirs++;
        while (*node) {
                struct kernfs_node *pos;
                int result;
@@ -249,9 +246,15 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
                else
                        return -EEXIST;
        }
        /* add new node and rebalance the tree */
        rb_link_node(&kn->rb, parent, node);
        rb_insert_color(&kn->rb, &kn->parent->dir.children);
+        /* successfully added, account subdir number */
+        if (kernfs_type(kn) == KERNFS_DIR)
+                kn->parent->dir.subdirs++;
        return 0;
 }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8034706a7af8..5e9a80cfc3d8 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -484,6 +484,8 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
        ops = kernfs_ops(of->kn);
        rc = ops->mmap(of, vma);
+        if (rc)
+                goto out_put;
        /*
         * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
@@ -608,6 +610,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
 static int kernfs_fop_open(struct inode *inode, struct file *file)
 {
        struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
+        struct kernfs_root *root = kernfs_root(kn);
        const struct kernfs_ops *ops;
        struct kernfs_open_file *of;
        bool has_read, has_write, has_mmap;
@@ -622,14 +625,16 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
        has_write = ops->write || ops->mmap;
        has_mmap = ops->mmap;
-        /* check perms and supported operations */
+        /* see the flag definition for details */
-        if ((file->f_mode & FMODE_WRITE) &&
+        if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
-            (!(inode->i_mode & S_IWUGO) || !has_write))
+                if ((file->f_mode & FMODE_WRITE) &&
-                goto err_out;
+                    (!(inode->i_mode & S_IWUGO) || !has_write))
+                        goto err_out;
-        if ((file->f_mode & FMODE_READ) &&
+                if ((file->f_mode & FMODE_READ) &&
-            (!(inode->i_mode & S_IRUGO) || !has_read))
+                    (!(inode->i_mode & S_IRUGO) || !has_read))
-                goto err_out;
+                        goto err_out;
+        }
        /* allocate a kernfs_open_file for the file */
        error = -ENOMEM;
diff --git a/fs/locks.c b/fs/locks.c
index 13fc7a6d380a..e390bd9ae068 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -135,7 +135,7 @@
 #define IS_POSIX(fl)    (fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)    (fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)    (fl->fl_flags & (FL_LEASE|FL_DELEG))
-#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT)
+#define IS_OFDLCK(fl)   (fl->fl_flags & FL_OFDLCK)
 static bool lease_breaking(struct file_lock *fl)
 {
@@ -389,18 +389,6 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
        fl->fl_ops = NULL;
        fl->fl_lmops = NULL;
-        /* Ensure that fl->fl_filp has compatible f_mode */
-        switch (l->l_type) {
-        case F_RDLCK:
-                if (!(filp->f_mode & FMODE_READ))
-                        return -EBADF;
-                break;
-        case F_WRLCK:
-                if (!(filp->f_mode & FMODE_WRITE))
-                        return -EBADF;
-                break;
-        }
        return assign_type(fl, l->l_type);
 }
@@ -564,7 +552,7 @@ static void __locks_insert_block(struct file_lock *blocker,
        BUG_ON(!list_empty(&waiter->fl_block));
        waiter->fl_next = blocker;
        list_add_tail(&waiter->fl_block, &blocker->fl_block);
-        if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker))
+        if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
                locks_insert_global_blocked(waiter);
 }
@@ -759,12 +747,12 @@ EXPORT_SYMBOL(posix_test_lock);
 * of tasks (such as posix threads) sharing the same open file table.
 * To handle those cases, we just bail out after a few iterations.
 *
- * For FL_FILE_PVT locks, the owner is the filp, not the files_struct.
+ * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
 * Because the owner is not even nominally tied to a thread of
 * execution, the deadlock detection below can't reasonably work well. Just
 * skip it for those.
 *
- * In principle, we could do a more limited deadlock detection on FL_FILE_PVT
+ * In principle, we could do a more limited deadlock detection on FL_OFDLCK
 * locks that just checks for the case where two tasks are attempting to
 * upgrade from read to write locks on the same inode.
 */
@@ -791,9 +779,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
        /*
         * This deadlock detector can't reasonably detect deadlocks with
-         * FL_FILE_PVT locks, since they aren't owned by a process, per-se.
+         * FL_OFDLCK locks, since they aren't owned by a process, per-se.
         */
-        if (IS_FILE_PVT(caller_fl))
+        if (IS_OFDLCK(caller_fl))
                return 0;
        while ((block_fl = what_owner_is_waiting_for(block_fl))) {
@@ -1391,11 +1379,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 restart:
        break_time = flock->fl_break_time;
-        if (break_time != 0) {
+        if (break_time != 0)
                break_time -= jiffies;
-                if (break_time == 0)
+        if (break_time == 0)
-                        break_time++;
+                break_time++;
-        }
        locks_insert_block(flock, new_fl);
        spin_unlock(&inode->i_lock);
        error = wait_event_interruptible_timeout(new_fl->fl_wait,
@@ -1891,7 +1878,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 {
-        flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
+        flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
 #if BITS_PER_LONG == 32
        /*
         * Make sure we can represent the posix lock via
@@ -1913,7 +1900,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 #if BITS_PER_LONG == 32
 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 {
-        flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
+        flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
        flock->l_start = fl->fl_start;
        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
                fl->fl_end - fl->fl_start + 1;
@@ -1942,13 +1929,13 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
        if (error)
                goto out;
-        if (cmd == F_GETLKP) {
+        if (cmd == F_OFD_GETLK) {
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_GETLK;
-                file_lock.fl_flags |= FL_FILE_PVT;
+                file_lock.fl_flags |= FL_OFDLCK;
                file_lock.fl_owner = (fl_owner_t)filp;
        }
@@ -2035,6 +2022,22 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,
        return error;
 }
+/* Ensure that fl->fl_filp has compatible f_mode for F_SETLK calls */
+static int
+check_fmode_for_setlk(struct file_lock *fl)
+{
+        switch (fl->fl_type) {
+        case F_RDLCK:
+                if (!(fl->fl_file->f_mode & FMODE_READ))
+                        return -EBADF;
+                break;
+        case F_WRLCK:
+                if (!(fl->fl_file->f_mode & FMODE_WRITE))
+                        return -EBADF;
+        }
+        return 0;
+}
 /* Apply the lock described by l to an open file descriptor.
 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
 */
@@ -2072,27 +2075,31 @@ again:
        if (error)
                goto out;
+        error = check_fmode_for_setlk(file_lock);
+        if (error)
+                goto out;
        /*
         * If the cmd is requesting file-private locks, then set the
-         * FL_FILE_PVT flag and override the owner.
+         * FL_OFDLCK flag and override the owner.
         */
        switch (cmd) {
-        case F_SETLKP:
+        case F_OFD_SETLK:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLK;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                break;
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLKW;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                /* Fallthrough */
        case F_SETLKW:
@@ -2144,13 +2151,13 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
        if (error)
                goto out;
-        if (cmd == F_GETLKP) {
+        if (cmd == F_OFD_GETLK) {
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_GETLK64;
-                file_lock.fl_flags |= FL_FILE_PVT;
+                file_lock.fl_flags |= FL_OFDLCK;
                file_lock.fl_owner = (fl_owner_t)filp;
        }
@@ -2207,27 +2214,31 @@ again:
        if (error)
                goto out;
+        error = check_fmode_for_setlk(file_lock);
+        if (error)
+                goto out;
        /*
         * If the cmd is requesting file-private locks, then set the
-         * FL_FILE_PVT flag and override the owner.
+         * FL_OFDLCK flag and override the owner.
         */
        switch (cmd) {
-        case F_SETLKP:
+        case F_OFD_SETLK:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLK64;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                break;
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLKW64;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                /* Fallthrough */
        case F_SETLKW64:
@@ -2413,8 +2424,8 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
        if (IS_POSIX(fl)) {
                if (fl->fl_flags & FL_ACCESS)
                        seq_printf(f, "ACCESS");
-                else if (IS_FILE_PVT(fl))
+                else if (IS_OFDLCK(fl))
-                        seq_printf(f, "FLPVT ");
+                        seq_printf(f, "OFDLCK");
                else
                        seq_printf(f, "POSIX ");
diff --git a/fs/namei.c b/fs/namei.c
index c6157c894fce..80168273396b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1542,7 +1542,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
                inode = path->dentry->d_inode;
        }
        err = -ENOENT;
-        if (!inode)
+        if (!inode || d_is_negative(path->dentry))
                goto out_path_put;
        if (should_follow_link(path->dentry, follow)) {
@@ -2249,7 +2249,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
        mutex_unlock(&dir->d_inode->i_mutex);
 done:
-        if (!dentry->d_inode) {
+        if (!dentry->d_inode || d_is_negative(dentry)) {
                error = -ENOENT;
                dput(dentry);
                goto out;
@@ -2994,7 +2994,7 @@ retry_lookup:
 finish_lookup:
        /* we _can_ be in RCU mode here */
        error = -ENOENT;
-        if (d_is_negative(path->dentry)) {
+        if (!inode || d_is_negative(path->dentry)) {
                path_to_nameidata(path, nd);
                goto out;
        }
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6f3f392d48af..f66c66b9f182 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -402,8 +402,10 @@ sort_pacl(struct posix_acl *pacl)
         * by uid/gid. */
        int i, j;
-        if (pacl->a_count <= 4)
+        /* no users or groups */
-                return; /* no users or groups */
+        if (!pacl || pacl->a_count <= 4)
+                return;
        i = 1;
        while (pacl->a_entries[i].e_tag == ACL_USER)
                i++;
@@ -530,13 +532,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
        /*
         * ACLs with no ACEs are treated differently in the inheritable
-         * and effective cases: when there are no inheritable ACEs, we
+         * and effective cases: when there are no inheritable ACEs,
-         * set a zero-length default posix acl:
+         * calls ->set_acl with a NULL ACL structure.
         */
-        if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) {
+        if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
-                pacl = posix_acl_alloc(0, GFP_KERNEL);
+                return NULL;
-                return pacl ? pacl : ERR_PTR(-ENOMEM);
-        }
        /*
         * When there are no effective ACEs, the following will end
         * up setting a 3-element effective posix ACL with all
@@ -589,7 +590,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
                add_to_mask(state, &state->groups->aces[i].perms);
        }
-        if (!state->users->n && !state->groups->n) {
+        if (state->users->n || state->groups->n) {
                pace++;
                pace->e_tag = ACL_MASK;
                low_mode_from_nfs4(state->mask.allow, &pace->e_perm, flags);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 39c8ef875f91..2c73cae9899d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -654,9 +654,11 @@ static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
+        int maxtime = max_cb_time(clp->net);
        struct rpc_timeout      timeparms = {
-                .to_initval     = max_cb_time(clp->net),
+                .to_initval     = maxtime,
                .to_retries     = 0,
+                .to_maxval      = maxtime,
        };
        struct rpc_create_args args = {
                .net            = clp->net,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3ba65979a3cd..9a77a5a21557 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1078,6 +1078,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
                return NULL;
        }
        clp->cl_name.len = name.len;
+        INIT_LIST_HEAD(&clp->cl_sessions);
+        idr_init(&clp->cl_stateids);
+        atomic_set(&clp->cl_refcount, 0);
+        clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+        INIT_LIST_HEAD(&clp->cl_idhash);
+        INIT_LIST_HEAD(&clp->cl_openowners);
+        INIT_LIST_HEAD(&clp->cl_delegations);
+        INIT_LIST_HEAD(&clp->cl_lru);
+        INIT_LIST_HEAD(&clp->cl_callbacks);
+        INIT_LIST_HEAD(&clp->cl_revoked);
+        spin_lock_init(&clp->cl_lock);
+        rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
        return clp;
 }
@@ -1095,6 +1107,7 @@ free_client(struct nfs4_client *clp)
                WARN_ON_ONCE(atomic_read(&ses->se_ref));
                free_session(ses);
        }
+        rpc_destroy_wait_queue(&clp->cl_cb_waitq);
        free_svc_cred(&clp->cl_cred);
        kfree(clp->cl_name.data);
        idr_destroy(&clp->cl_stateids);
@@ -1347,7 +1360,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
        if (clp == NULL)
                return NULL;
-        INIT_LIST_HEAD(&clp->cl_sessions);
        ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
        if (ret) {
                spin_lock(&nn->client_lock);
@@ -1355,20 +1367,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
                spin_unlock(&nn->client_lock);
                return NULL;
        }
-        idr_init(&clp->cl_stateids);
-        atomic_set(&clp->cl_refcount, 0);
-        clp->cl_cb_state = NFSD4_CB_UNKNOWN;
-        INIT_LIST_HEAD(&clp->cl_idhash);
-        INIT_LIST_HEAD(&clp->cl_openowners);
-        INIT_LIST_HEAD(&clp->cl_delegations);
-        INIT_LIST_HEAD(&clp->cl_lru);
-        INIT_LIST_HEAD(&clp->cl_callbacks);
-        INIT_LIST_HEAD(&clp->cl_revoked);
-        spin_lock_init(&clp->cl_lock);
        nfsd4_init_callback(&clp->cl_cb_null);
        clp->cl_time = get_seconds();
        clear_bit(0, &clp->cl_cb_slot_busy);
-        rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
        copy_verf(clp, verf);
        rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
        gen_confirm(clp);
@@ -3716,9 +3717,16 @@ out:
 static __be32
 nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
 {
-        if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner)))
+        struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
+        if (check_for_locks(stp->st_file, lo))
                return nfserr_locks_held;
-        release_lock_stateid(stp);
+        /*
+         * Currently there's a 1-1 lock stateid<->lockowner
+         * correspondance, and we have to delete the lockowner when we
+         * delete the lock stateid:
+         */
+        unhash_lockowner(lo);
        return nfs_ok;
 }
@@ -4158,6 +4166,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c
        if (!same_owner_str(&lo->lo_owner, owner, clid))
                return false;
+        if (list_empty(&lo->lo_owner.so_stateids)) {
+                WARN_ON_ONCE(1);
+                return false;
+        }
        lst = list_first_entry(&lo->lo_owner.so_stateids,
                               struct nfs4_ol_stateid, st_perstateowner);
        return lst->st_file->fi_inode == inode;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2723c1badd01..18881f34737a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3627,14 +3627,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        /* nfsd4_check_resp_size guarantees enough room for error status */
        if (!op->status)
                op->status = nfsd4_check_resp_size(resp, 0);
-        if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
-                struct nfsd4_slot *slot = resp->cstate.slot;
-                if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
-                        op->status = nfserr_rep_too_big_to_cache;
-                else
-                        op->status = nfserr_rep_too_big;
-        }
        if (so) {
                so->so_replay.rp_status = op->status;
                so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 4e565c814309..732648b270dc 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -698,6 +698,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
        }
        group->overflow_event = &oevent->fse;
+        if (force_o_largefile())
+                event_f_flags |= O_LARGEFILE;
        group->fanotify_data.f_flags = event_f_flags;
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        spin_lock_init(&group->fanotify_data.access_lock);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index af3f7aa73e13..ee1f88419cb0 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -472,11 +472,15 @@ bail:
 void dlm_destroy_master_caches(void)
 {
-        if (dlm_lockname_cache)
+        if (dlm_lockname_cache) {
                kmem_cache_destroy(dlm_lockname_cache);
+                dlm_lockname_cache = NULL;
+        }
-        if (dlm_lockres_cache)
+        if (dlm_lockres_cache) {
                kmem_cache_destroy(dlm_lockres_cache);
+                dlm_lockres_cache = NULL;
+        }
 }
 static void dlm_lockres_release(struct kref *kref)
diff --git a/fs/open.c b/fs/open.c
index 3d30eb1fc95e..9d64679cec73 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -254,17 +254,22 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EBADF;
        /*
-         * It's not possible to punch hole or perform collapse range
+         * We can only allow pure fallocate on append only files
-         * on append only file
         */
-        if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+        if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
-            && IS_APPEND(inode))
                return -EPERM;
        if (IS_IMMUTABLE(inode))
                return -EPERM;
        /*
+         * We can not allow to do any fallocate operation on an active
+         * swapfile
+         */
+        if (IS_SWAPFILE(inode))
+                ret = -ETXTBSY;
+        /*
         * Revalidate the write permissions, in case security policy has
         * changed since the files were opened.
         */
@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
-        /*
-         * There is no need to overlap collapse range with EOF, in which case
-         * it is effectively a truncate operation
-         */
-        if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
-            (offset + len >= i_size_read(inode)))
-                return -EINVAL;
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 9e363e41dacc..0855f772cd41 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -246,6 +246,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
        umode_t mode = 0;
        int not_equiv = 0;
+        /*
+         * A null ACL can always be presented as mode bits.
+         */
+        if (!acl)
+                return 0;
        FOREACH_ACL_ENTRY(pa, acl, pe) {
                switch (pa->e_tag) {
                        case ACL_USER_OBJ:
diff --git a/fs/splice.c b/fs/splice.c
index 9bc07d2b53cf..e246954ea48c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1537,7 +1537,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
        struct iovec iovstack[UIO_FASTIOV];
        struct iovec *iov = iovstack;
        struct iov_iter iter;
-        ssize_t count = 0;
+        ssize_t count;
        pipe = get_pipe_info(file);
        if (!pipe)
@@ -1546,8 +1546,9 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
        ret = rw_copy_check_uvector(READ, uiov, nr_segs,
                                    ARRAY_SIZE(iovstack), iovstack, &iov);
        if (ret <= 0)
-                return ret;
+                goto out;
+        count = ret;
        iov_iter_init(&iter, iov, nr_segs, count, 0);
        sd.len = 0;
@@ -1560,6 +1561,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
        ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
        pipe_unlock(pipe);
+out:
        if (iov != iovstack)
                kfree(iov);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 28cc1acd5439..e9ef59b3abb1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -47,12 +47,13 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
        ssize_t count;
        char *buf;
-        /* acquire buffer and ensure that it's >= PAGE_SIZE */
+        /* acquire buffer and ensure that it's >= PAGE_SIZE and clear */
        count = seq_get_buf(sf, &buf);
        if (count < PAGE_SIZE) {
                seq_commit(sf, -1);
                return 0;
        }
+        memset(buf, 0, PAGE_SIZE);
        /*
         * Invoke show().  Control may reach here via seq file lseek even
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index a66ad6196f59..8794423f7efb 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -63,7 +63,8 @@ int __init sysfs_init(void)
 {
        int err;
-        sysfs_root = kernfs_create_root(NULL, 0, NULL);
+        sysfs_root = kernfs_create_root(NULL, KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
+                                        NULL);
        if (IS_ERR(sysfs_root))
                return PTR_ERR(sysfs_root);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a1266089eca1..a81c7b556896 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1556,7 +1556,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
        if (c->space_fixup) {
                err = ubifs_fixup_free_space(c);
                if (err)
-                        return err;
+                        goto out;
        }
        err = check_free_space(c);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01b6a0102fbd..abda1124a70f 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -213,7 +213,7 @@ xfs_attr_calc_size(
                 * Out of line attribute, cannot double split, but
                 * make room for the attribute value itself.
                 */
-                uint    dblocks = XFS_B_TO_FSB(mp, valuelen);
+                uint    dblocks = xfs_attr3_rmt_blocks(mp, valuelen);
                nblks += dblocks;
                nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
        }
@@ -698,11 +698,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                trace_xfs_attr_leaf_replace(args);
+                /* save the attribute state for later removal*/
                args->op_flags |= XFS_DA_OP_RENAME;     /* an atomic rename */
                args->blkno2 = args->blkno;             /* set 2nd entry info*/
                args->index2 = args->index;
                args->rmtblkno2 = args->rmtblkno;
                args->rmtblkcnt2 = args->rmtblkcnt;
+                args->rmtvaluelen2 = args->rmtvaluelen;
+                /*
+                 * clear the remote attr state now that it is saved so that the
+                 * values reflect the state of the attribute we are about to
+                 * add, not the attribute we just found and will remove later.
+                 */
+                args->rmtblkno = 0;
+                args->rmtblkcnt = 0;
+                args->rmtvaluelen = 0;
        }
        /*
@@ -794,6 +805,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
                args->blkno = args->blkno2;
                args->rmtblkno = args->rmtblkno2;
                args->rmtblkcnt = args->rmtblkcnt2;
+                args->rmtvaluelen = args->rmtvaluelen2;
                if (args->rmtblkno) {
                        error = xfs_attr_rmtval_remove(args);
                        if (error)
@@ -999,13 +1011,22 @@ restart:
                trace_xfs_attr_node_replace(args);
+                /* save the attribute state for later removal*/
                args->op_flags |= XFS_DA_OP_RENAME;     /* atomic rename op */
                args->blkno2 = args->blkno;             /* set 2nd entry info*/
                args->index2 = args->index;
                args->rmtblkno2 = args->rmtblkno;
                args->rmtblkcnt2 = args->rmtblkcnt;
+                args->rmtvaluelen2 = args->rmtvaluelen;
+                /*
+                 * clear the remote attr state now that it is saved so that the
+                 * values reflect the state of the attribute we are about to
+                 * add, not the attribute we just found and will remove later.
+                 */
                args->rmtblkno = 0;
                args->rmtblkcnt = 0;
+                args->rmtvaluelen = 0;
        }
        retval = xfs_attr3_leaf_add(blk->bp, state->args);
@@ -1133,6 +1154,7 @@ restart:
                args->blkno = args->blkno2;
                args->rmtblkno = args->rmtblkno2;
                args->rmtblkcnt = args->rmtblkcnt2;
+                args->rmtvaluelen = args->rmtvaluelen2;
                if (args->rmtblkno) {
                        error = xfs_attr_rmtval_remove(args);
                        if (error)
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index fe9587fab17a..511c283459b1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1229,6 +1229,7 @@ xfs_attr3_leaf_add_work(
                name_rmt->valueblk = 0;
                args->rmtblkno = 1;
                args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+                args->rmtvaluelen = args->valuelen;
        }
        xfs_trans_log_buf(args->trans, bp,
             XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -2167,11 +2168,11 @@ xfs_attr3_leaf_lookup_int(
                        if (!xfs_attr_namesp_match(args->flags, entry->flags))
                                continue;
                        args->index = probe;
-                        args->valuelen = be32_to_cpu(name_rmt->valuelen);
+                        args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
                        args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
                        args->rmtblkcnt = xfs_attr3_rmt_blocks(
                                                        args->dp->i_mount,
-                                                        args->valuelen);
+                                                        args->rmtvaluelen);
                        return XFS_ERROR(EEXIST);
                }
        }
@@ -2220,19 +2221,19 @@ xfs_attr3_leaf_getvalue(
                name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
                ASSERT(name_rmt->namelen == args->namelen);
                ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
-                valuelen = be32_to_cpu(name_rmt->valuelen);
+                args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
                args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
                args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
-                                                       valuelen);
+                                                       args->rmtvaluelen);
                if (args->flags & ATTR_KERNOVAL) {
-                        args->valuelen = valuelen;
+                        args->valuelen = args->rmtvaluelen;
                        return 0;
                }
-                if (args->valuelen < valuelen) {
+                if (args->valuelen < args->rmtvaluelen) {
-                        args->valuelen = valuelen;
+                        args->valuelen = args->rmtvaluelen;
                        return XFS_ERROR(ERANGE);
                }
-                args->valuelen = valuelen;
+                args->valuelen = args->rmtvaluelen;
        }
        return 0;
 }
@@ -2519,7 +2520,7 @@ xfs_attr3_leaf_clearflag(
                ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);
                name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
                name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
-                name_rmt->valuelen = cpu_to_be32(args->valuelen);
+                name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
                xfs_trans_log_buf(args->trans, bp,
                         XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
        }
@@ -2677,7 +2678,7 @@ xfs_attr3_leaf_flipflags(
                ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
                name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);
                name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
-                name_rmt->valuelen = cpu_to_be32(args->valuelen);
+                name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);
                xfs_trans_log_buf(args->trans, bp1,
                         XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
        }
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 01db96f60cf0..833fe5d98d80 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -447,6 +447,7 @@ xfs_attr3_leaf_list_int(
                                args.dp = context->dp;
                                args.whichfork = XFS_ATTR_FORK;
                                args.valuelen = valuelen;
+                                args.rmtvaluelen = valuelen;
                                args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
                                args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
                                args.rmtblkcnt = xfs_attr3_rmt_blocks(
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 6e37823e2932..d2e6e948cec7 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -337,7 +337,7 @@ xfs_attr_rmtval_get(
        struct xfs_buf          *bp;
        xfs_dablk_t             lblkno = args->rmtblkno;
        __uint8_t               *dst = args->value;
-        int                     valuelen = args->valuelen;
+        int                     valuelen;
        int                     nmap;
        int                     error;
        int                     blkcnt = args->rmtblkcnt;
@@ -347,7 +347,9 @@ xfs_attr_rmtval_get(
        trace_xfs_attr_rmtval_get(args);
        ASSERT(!(args->flags & ATTR_KERNOVAL));
+        ASSERT(args->rmtvaluelen == args->valuelen);
+        valuelen = args->rmtvaluelen;
        while (valuelen > 0) {
                nmap = ATTR_RMTVALUE_MAPSIZE;
                error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
@@ -415,7 +417,7 @@ xfs_attr_rmtval_set(
         * attributes have headers, we can't just do a straight byte to FSB
         * conversion and have to take the header space into account.
         */
-        blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+        blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
        error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
                                                   XFS_ATTR_FORK);
        if (error)
@@ -480,7 +482,7 @@ xfs_attr_rmtval_set(
         */
        lblkno = args->rmtblkno;
        blkcnt = args->rmtblkcnt;
-        valuelen = args->valuelen;
+        valuelen = args->rmtvaluelen;
        while (valuelen > 0) {
                struct xfs_buf  *bp;
                xfs_daddr_t     dblkno;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6e95ea79f5d7..201c6091d26a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -60,10 +60,12 @@ typedef struct xfs_da_args {
        int             index;          /* index of attr of interest in blk */
        xfs_dablk_t     rmtblkno;       /* remote attr value starting blkno */
        int             rmtblkcnt;      /* remote attr value block count */
+        int             rmtvaluelen;    /* remote attr value length in bytes */
        xfs_dablk_t     blkno2;         /* blkno of 2nd attr leaf of interest */
        int             index2;         /* index of 2nd attr in blk */
        xfs_dablk_t     rmtblkno2;      /* remote attr value starting blkno */
        int             rmtblkcnt2;     /* remote attr value block count */
+        int             rmtvaluelen2;   /* remote attr value length in bytes */
        int             op_flags;       /* operation flags */
        enum xfs_dacmp  cmpresult;      /* name compare result for lookups */
 } xfs_da_args_t;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 1399e187d425..753e467aa1a5 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
        if (!lsn)
                return 0;
-        return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+        return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
 }
 const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 82afdcb33183..830c1c937b88 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -155,7 +155,7 @@ xfs_dir_fsync(
        if (!lsn)
                return 0;
-        return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+        return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
 }
 STATIC int
@@ -295,7 +295,7 @@ xfs_file_aio_read(
                xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
                if (inode->i_mapping->nrpages) {
-                        ret = -filemap_write_and_wait_range(
+                        ret = filemap_write_and_wait_range(
                                                        VFS_I(ip)->i_mapping,
                                                        pos, -1);
                        if (ret) {
@@ -837,11 +837,19 @@ xfs_file_fallocate(
                unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
                if (offset & blksize_mask || len & blksize_mask) {
-                        error = -EINVAL;
+                        error = EINVAL;
+                        goto out_unlock;
+                }
+                /*
+                 * There is no need to overlap collapse range with EOF,
+                 * in which case it is effectively a truncate operation
+                 */
+                if (offset + len >= i_size_read(inode)) {
+                        error = EINVAL;
                        goto out_unlock;
                }
-                ASSERT(offset + len < i_size_read(inode));
                new_size = i_size_read(inode) - len;
                error = xfs_collapse_file_space(ip, offset, len);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ef1ca010f417..36d630319a27 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -72,8 +72,8 @@ xfs_initxattrs(
        int                     error = 0;
        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
-                error = xfs_attr_set(ip, xattr->name, xattr->value,
+                error = -xfs_attr_set(ip, xattr->name, xattr->value,
-                                     xattr->value_len, ATTR_SECURE);
+                                      xattr->value_len, ATTR_SECURE);
                if (error < 0)
                        break;
        }
@@ -93,8 +93,8 @@ xfs_init_security(
        struct inode    *dir,
        const struct qstr *qstr)
 {
-        return security_inode_init_security(inode, dir, qstr,
+        return -security_inode_init_security(inode, dir, qstr,
-                                            &xfs_initxattrs, NULL);
+                                             &xfs_initxattrs, NULL);
 }
 static void
@@ -124,15 +124,15 @@ xfs_cleanup_inode(
        xfs_dentry_to_name(&teardown, dentry, 0);
        xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-        iput(inode);
 }
 STATIC int
-xfs_vn_mknod(
+xfs_generic_create(
        struct inode    *dir,
        struct dentry   *dentry,
        umode_t         mode,
-        dev_t           rdev)
+        dev_t           rdev,
+        bool            tmpfile)        /* unnamed file */
 {
        struct inode    *inode;
        struct xfs_inode *ip = NULL;
@@ -156,8 +156,12 @@ xfs_vn_mknod(
        if (error)
                return error;
-        xfs_dentry_to_name(&name, dentry, mode);
+        if (!tmpfile) {
-        error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+                xfs_dentry_to_name(&name, dentry, mode);
+                error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+        } else {
+                error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
+        }
        if (unlikely(error))
                goto out_free_acl;
@@ -169,18 +173,22 @@ xfs_vn_mknod(
 #ifdef CONFIG_XFS_POSIX_ACL
        if (default_acl) {
-                error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+                error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
                if (error)
                        goto out_cleanup_inode;
        }
        if (acl) {
-                error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+                error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
                if (error)
                        goto out_cleanup_inode;
        }
 #endif
-        d_instantiate(dentry, inode);
+        if (tmpfile)
+                d_tmpfile(dentry, inode);
+        else
+                d_instantiate(dentry, inode);
 out_free_acl:
        if (default_acl)
                posix_acl_release(default_acl);
@@ -189,11 +197,23 @@ xfs_vn_mknod(
        return -error;
 out_cleanup_inode:
-        xfs_cleanup_inode(dir, inode, dentry);
+        if (!tmpfile)
+                xfs_cleanup_inode(dir, inode, dentry);
+        iput(inode);
        goto out_free_acl;
 }
 STATIC int
+xfs_vn_mknod(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        umode_t         mode,
+        dev_t           rdev)
+{
+        return xfs_generic_create(dir, dentry, mode, rdev, false);
+}
+STATIC int
 xfs_vn_create(
        struct inode    *dir,
        struct dentry   *dentry,
@@ -353,6 +373,7 @@ xfs_vn_symlink(
 out_cleanup_inode:
        xfs_cleanup_inode(dir, inode, dentry);
+        iput(inode);
 out:
        return -error;
 }
@@ -1053,25 +1074,7 @@ xfs_vn_tmpfile(
        struct dentry   *dentry,
        umode_t         mode)
 {
-        int                     error;
+        return xfs_generic_create(dir, dentry, mode, 0, true);
-        struct xfs_inode        *ip;
-        struct inode            *inode;
-        error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
-        if (unlikely(error))
-                return -error;
-        inode = VFS_I(ip);
-        error = xfs_init_security(inode, dir, &dentry->d_name);
-        if (unlikely(error)) {
-                iput(inode);
-                return -error;
-        }
-        d_tmpfile(dentry, inode);
-        return 0;
 }
 static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 08624dc67317..a5f8bd9899d3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -616,11 +616,13 @@ xfs_log_mount(
        int             error = 0;
        int             min_logfsbs;
-        if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
+        if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
-                xfs_notice(mp, "Mounting Filesystem");
+                xfs_notice(mp, "Mounting V%d Filesystem",
-        else {
+                           XFS_SB_VERSION_NUM(&mp->m_sb));
+        } else {
                xfs_notice(mp,
-"Mounting filesystem in no-recovery mode.  Filesystem will be inconsistent.");
+"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
+                           XFS_SB_VERSION_NUM(&mp->m_sb));
                ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
        }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 993cb19e7d39..944f3d9456a8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -743,8 +743,6 @@ xfs_mountfs(
                new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
                if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
                        mp->m_inode_cluster_size = new_size;
-                xfs_info(mp, "Using inode cluster size of %d bytes",
-                         mp->m_inode_cluster_size);
        }
        /*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 348e4d2ed6e6..dc977b6e6a36 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -843,22 +843,17 @@ xfs_qm_init_quotainfo(
        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-        if ((error = list_lru_init(&qinf->qi_lru))) {
+        error = -list_lru_init(&qinf->qi_lru);
-                kmem_free(qinf);
+        if (error)
-                mp->m_quotainfo = NULL;
+                goto out_free_qinf;
-                return error;
-        }
        /*
         * See if quotainodes are setup, and if not, allocate them,
         * and change the superblock accordingly.
         */
-        if ((error = xfs_qm_init_quotainos(mp))) {
+        error = xfs_qm_init_quotainos(mp);
-                list_lru_destroy(&qinf->qi_lru);
+        if (error)
-                kmem_free(qinf);
+                goto out_free_lru;
-                mp->m_quotainfo = NULL;
-                return error;
-        }
        INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
        INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
@@ -918,7 +913,7 @@ xfs_qm_init_quotainfo(
                qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
                qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
                qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
- 
                xfs_qm_dqdestroy(dqp);
        } else {
                qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -935,6 +930,13 @@ xfs_qm_init_quotainfo(
        qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
        register_shrinker(&qinf->qi_shrinker);
        return 0;
+out_free_lru:
+        list_lru_destroy(&qinf->qi_lru);
+out_free_qinf:
+        kmem_free(qinf);
+        mp->m_quotainfo = NULL;
+        return error;
 }
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 0c0e41bbe4e3..8baf61afae1d 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -201,10 +201,6 @@ xfs_mount_validate_sb(
         * write validation, we don't need to check feature masks.
         */
        if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
-                xfs_alert(mp,
-"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
-"Use of these features in this kernel is at your own risk!");
                if (xfs_sb_has_compat_feature(sbp,
                                        XFS_SB_FEAT_COMPAT_UNKNOWN)) {
                        xfs_warn(mp,
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 205376776377..3494eff8e4eb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1433,11 +1433,11 @@ xfs_fs_fill_super(
        if (error)
                goto out_free_fsname;
-        error = xfs_init_mount_workqueues(mp);
+        error = -xfs_init_mount_workqueues(mp);
        if (error)
                goto out_close_devices;
-        error = xfs_icsb_init_counters(mp);
+        error = -xfs_icsb_init_counters(mp);
        if (error)
                goto out_destroy_workqueues;