84 files changed, 1141 insertions, 606 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 0568fd986821..e432bd27a2e7 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -208,7 +208,7 @@ again:
                /* The new front of the queue now owns the state variables. */
                next = list_entry(vnode->pending_locks.next,
                                  struct file_lock, fl_u.afs.link);
-                vnode->lock_key = afs_file_key(next->fl_file);
+                vnode->lock_key = key_get(afs_file_key(next->fl_file));
                vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
                vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
                goto again;
@@ -413,7 +413,7 @@ static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
        /* The new front of the queue now owns the state variables. */
        next = list_entry(vnode->pending_locks.next,
                          struct file_lock, fl_u.afs.link);
-        vnode->lock_key = afs_file_key(next->fl_file);
+        vnode->lock_key = key_get(afs_file_key(next->fl_file));
        vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
        vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
        afs_lock_may_be_available(vnode);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b17d3620414..1a4ce07fb406 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -414,7 +414,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
                valid = true;
        } else {
-                vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
                vnode->cb_v_break = vnode->volume->cb_v_break;
                valid = false;
        }
@@ -546,6 +545,8 @@ void afs_evict_inode(struct inode *inode)
 #endif
        afs_put_permits(rcu_access_pointer(vnode->permit_cache));
+        key_put(vnode->lock_key);
+        vnode->lock_key = NULL;
        _leave("");
 }
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index 07bc10f076aa..d443e2bfa094 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -161,3 +161,14 @@ struct yfs_xdr_YFSStoreVolumeStatus {
        struct yfs_xdr_u64      max_quota;
        struct yfs_xdr_u64      file_quota;
 } __packed;
+enum yfs_lock_type {
+        yfs_LockNone            = -1,
+        yfs_LockRead            = 0,
+        yfs_LockWrite           = 1,
+        yfs_LockExtend          = 2,
+        yfs_LockRelease         = 3,
+        yfs_LockMandatoryRead   = 0x100,
+        yfs_LockMandatoryWrite  = 0x101,
+        yfs_LockMandatoryExtend = 0x102,
+};
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a7b44863d502..2c588f9bbbda 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -23,6 +23,7 @@ struct workqueue_struct *afs_async_calls;
 static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
 static long afs_wait_for_call_to_complete(struct afs_call *, struct afs_addr_cursor *);
 static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_delete_async_call(struct work_struct *);
 static void afs_process_async_call(struct work_struct *);
 static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
 static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
@@ -203,20 +204,26 @@ void afs_put_call(struct afs_call *call)
        }
 }
+static struct afs_call *afs_get_call(struct afs_call *call,
+                                     enum afs_call_trace why)
+{
+        int u = atomic_inc_return(&call->usage);
+        trace_afs_call(call, why, u,
+                       atomic_read(&call->net->nr_outstanding_calls),
+                       __builtin_return_address(0));
+        return call;
+}
 /*
 * Queue the call for actual work.
 */
 static void afs_queue_call_work(struct afs_call *call)
 {
        if (call->type->work) {
-                int u = atomic_inc_return(&call->usage);
-                trace_afs_call(call, afs_call_trace_work, u,
-                               atomic_read(&call->net->nr_outstanding_calls),
-                               __builtin_return_address(0));
                INIT_WORK(&call->work, call->type->work);
+                afs_get_call(call, afs_call_trace_work);
                if (!queue_work(afs_wq, &call->work))
                        afs_put_call(call);
        }
@@ -398,6 +405,12 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
                }
        }
+        /* If the call is going to be asynchronous, we need an extra ref for
+         * the call to hold itself so the caller need not hang on to its ref.
+         */
+        if (call->async)
+                afs_get_call(call, afs_call_trace_get);
        /* create a call */
        rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key,
                                         (unsigned long)call,
@@ -438,15 +451,17 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
                        goto error_do_abort;
        }
-        /* at this point, an async call may no longer exist as it may have
+        /* Note that at this point, we may have received the reply or an abort
-         * already completed */
+         * - and an asynchronous call may already have completed.
-        if (call->async)
+         */
+        if (call->async) {
+                afs_put_call(call);
                return -EINPROGRESS;
+        }
        return afs_wait_for_call_to_complete(call, ac);
 error_do_abort:
-        call->state = AFS_CALL_COMPLETE;
        if (ret != -ECONNABORTED) {
                rxrpc_kernel_abort_call(call->net->socket, rxcall,
                                        RX_USER_ABORT, ret, "KSD");
@@ -463,8 +478,24 @@ error_do_abort:
 error_kill_call:
        if (call->type->done)
                call->type->done(call);
-        afs_put_call(call);
+        /* We need to dispose of the extra ref we grabbed for an async call.
+         * The call, however, might be queued on afs_async_calls and we need to
+         * make sure we don't get any more notifications that might requeue it.
+         */
+        if (call->rxcall) {
+                rxrpc_kernel_end_call(call->net->socket, call->rxcall);
+                call->rxcall = NULL;
+        }
+        if (call->async) {
+                if (cancel_work_sync(&call->async_work))
+                        afs_put_call(call);
+                afs_put_call(call);
+        }
        ac->error = ret;
+        call->state = AFS_CALL_COMPLETE;
+        afs_put_call(call);
        _leave(" = %d", ret);
        return ret;
 }
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 95d0761cdb34..155dc14caef9 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -42,9 +42,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell,
                if (vldb->fs_mask[i] & type_mask)
                        nr_servers++;
-        slist = kzalloc(sizeof(struct afs_server_list) +
+        slist = kzalloc(struct_size(slist, servers, nr_servers), GFP_KERNEL);
-                        sizeof(struct afs_server_entry) * nr_servers,
-                        GFP_KERNEL);
        if (!slist)
                goto error;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 12658c1363ae..5aa57929e8c2 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -803,7 +803,7 @@ int yfs_fs_create_file(struct afs_fs_cursor *fc,
        bp = xdr_encode_YFSFid(bp, &vnode->fid);
        bp = xdr_encode_string(bp, name, namesz);
        bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
-        bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+        bp = xdr_encode_u32(bp, yfs_LockNone); /* ViceLockType */
        yfs_check_req(call, bp);
        afs_use_fs_server(call, fc->cbi);
diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..aaaaf4d12c73 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
        if (unlikely(!req->ki_filp))
                return -EBADF;
        req->ki_complete = aio_complete_rw;
+        req->private = NULL;
        req->ki_pos = iocb->aio_offset;
        req->ki_flags = iocb_flags(req->ki_filp);
        if (iocb->aio_flags & IOCB_FLAG_RESFD)
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index d441244b79df..28d9c2b1b3bb 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb,
        pkt.len = dentry->d_name.len;
        memcpy(pkt.name, dentry->d_name.name, pkt.len);
        pkt.name[pkt.len] = '\0';
-        dput(dentry);
        if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
                ret = -EFAULT;
@@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb,
        complete_all(&ino->expire_complete);
        spin_unlock(&sbi->fs_lock);
+        dput(dentry);
        return ret;
 }
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 0e8ea2d9a2bb..078992eee299 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
        }
        root_inode = autofs_get_inode(s, S_IFDIR | 0755);
        root = d_make_root(root_inode);
-        if (!root)
+        if (!root) {
+                ret = -ENOMEM;
                goto fail_ino;
+        }
        pipe = NULL;
        root->d_fsdata = ino;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index d0078cbb718b..e996174cbfc0 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -14,13 +14,30 @@
 #include <linux/err.h>
 #include <linux/fs.h>
+static inline bool spacetab(char c) { return c == ' ' || c == '\t'; }
+static inline char *next_non_spacetab(char *first, const char *last)
+{
+        for (; first <= last; first++)
+                if (!spacetab(*first))
+                        return first;
+        return NULL;
+}
+static inline char *next_terminator(char *first, const char *last)
+{
+        for (; first <= last; first++)
+                if (spacetab(*first) || !*first)
+                        return first;
+        return NULL;
+}
 static int load_script(struct linux_binprm *bprm)
 {
        const char *i_arg, *i_name;
-        char *cp;
+        char *cp, *buf_end;
        struct file *file;
        int retval;
+        /* Not ours to exec if we don't start with "#!". */
        if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
                return -ENOEXEC;
@@ -33,23 +50,41 @@ static int load_script(struct linux_binprm *bprm)
        if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
                return -ENOENT;
-        /*
+        /* Release since we are not mapping a binary into memory. */
-         * This section does the #! interpretation.
-         * Sorta complicated, but hopefully it will work.  -TYT
-         */
        allow_write_access(bprm->file);
        fput(bprm->file);
        bprm->file = NULL;
-        for (cp = bprm->buf+2;; cp++) {
+        /*
-                if (cp >= bprm->buf + BINPRM_BUF_SIZE)
+         * This section handles parsing the #! line into separate
+         * interpreter path and argument strings. We must be careful
+         * because bprm->buf is not yet guaranteed to be NUL-terminated
+         * (though the buffer will have trailing NUL padding when the
+         * file size was smaller than the buffer size).
+         *
+         * We do not want to exec a truncated interpreter path, so either
+         * we find a newline (which indicates nothing is truncated), or
+         * we find a space/tab/NUL after the interpreter path (which
+         * itself may be preceded by spaces/tabs). Truncating the
+         * arguments is fine: the interpreter can re-read the script to
+         * parse them on its own.
+         */
+        buf_end = bprm->buf + sizeof(bprm->buf) - 1;
+        cp = strnchr(bprm->buf, sizeof(bprm->buf), '\n');
+        if (!cp) {
+                cp = next_non_spacetab(bprm->buf + 2, buf_end);
+                if (!cp)
+                        return -ENOEXEC; /* Entire buf is spaces/tabs */
+                /*
+                 * If there is no later space/tab/NUL we must assume the
+                 * interpreter path is truncated.
+                 */
+                if (!next_terminator(cp, buf_end))
                        return -ENOEXEC;
-                if (!*cp || (*cp == '\n'))
+                cp = buf_end;
-                        break;
        }
+        /* NUL-terminate the buffer and any trailing spaces/tabs. */
        *cp = '\0';
        while (cp > bprm->buf) {
                cp--;
                if ((*cp == ' ') || (*cp == '\t'))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c546cdce77e6..58a4c1217fa8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -104,6 +104,20 @@ void invalidate_bdev(struct block_device *bdev)
 }
 EXPORT_SYMBOL(invalidate_bdev);
+static void set_init_blocksize(struct block_device *bdev)
+{
+        unsigned bsize = bdev_logical_block_size(bdev);
+        loff_t size = i_size_read(bdev->bd_inode);
+        while (bsize < PAGE_SIZE) {
+                if (size & bsize)
+                        break;
+                bsize <<= 1;
+        }
+        bdev->bd_block_size = bsize;
+        bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+}
 int set_blocksize(struct block_device *bdev, int size)
 {
        /* Size must be a power of two, and between 512 and PAGE_SIZE */
@@ -1431,18 +1445,9 @@ EXPORT_SYMBOL(check_disk_change);
 void bd_set_size(struct block_device *bdev, loff_t size)
 {
-        unsigned bsize = bdev_logical_block_size(bdev);
        inode_lock(bdev->bd_inode);
        i_size_write(bdev->bd_inode, size);
        inode_unlock(bdev->bd_inode);
-        while (bsize < PAGE_SIZE) {
-                if (size & bsize)
-                        break;
-                bsize <<= 1;
-        }
-        bdev->bd_block_size = bsize;
-        bdev->bd_inode->i_blkbits = blksize_bits(bsize);
 }
 EXPORT_SYMBOL(bd_set_size);
@@ -1519,8 +1524,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                }
                        }
-                        if (!ret)
+                        if (!ret) {
                                bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+                                set_init_blocksize(bdev);
+                        }
                        /*
                         * If the device is invalidated, rescan partition
@@ -1555,6 +1562,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                goto out_clear;
                        }
                        bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+                        set_init_blocksize(bdev);
                }
                if (bdev->bd_bdi == &noop_backing_dev_info)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d92462fe66c8..5a6c39b44c84 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
        return 0;
 }
+static struct extent_buffer *alloc_tree_block_no_bg_flush(
+                                          struct btrfs_trans_handle *trans,
+                                          struct btrfs_root *root,
+                                          u64 parent_start,
+                                          const struct btrfs_disk_key *disk_key,
+                                          int level,
+                                          u64 hint,
+                                          u64 empty_size)
+{
+        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct extent_buffer *ret;
+        /*
+         * If we are COWing a node/leaf from the extent, chunk, device or free
+         * space trees, make sure that we do not finish block group creation of
+         * pending block groups. We do this to avoid a deadlock.
+         * COWing can result in allocation of a new chunk, and flushing pending
+         * block groups (btrfs_create_pending_block_groups()) can be triggered
+         * when finishing allocation of a new chunk. Creation of a pending block
+         * group modifies the extent, chunk, device and free space trees,
+         * therefore we could deadlock with ourselves since we are holding a
+         * lock on an extent buffer that btrfs_create_pending_block_groups() may
+         * try to COW later.
+         * For similar reasons, we also need to delay flushing pending block
+         * groups when splitting a leaf or node, from one of those trees, since
+         * we are holding a write lock on it and its parent or when inserting a
+         * new root node for one of those trees.
+         */
+        if (root == fs_info->extent_root ||
+            root == fs_info->chunk_root ||
+            root == fs_info->dev_root ||
+            root == fs_info->free_space_root)
+                trans->can_flush_pending_bgs = false;
+        ret = btrfs_alloc_tree_block(trans, root, parent_start,
+                                     root->root_key.objectid, disk_key, level,
+                                     hint, empty_size);
+        trans->can_flush_pending_bgs = true;
+        return ret;
+}
 /*
 * does the dirty work in cow of a single block.  The parent block (if
 * supplied) is updated to point to the new cow copy.  The new buffer is marked
@@ -1015,26 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
                parent_start = parent->start;
-        /*
+        cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
-         * If we are COWing a node/leaf from the extent, chunk or device trees,
+                                           level, search_start, empty_size);
-         * make sure that we do not finish block group creation of pending block
-         * groups. We do this to avoid a deadlock.
-         * COWing can result in allocation of a new chunk, and flushing pending
-         * block groups (btrfs_create_pending_block_groups()) can be triggered
-         * when finishing allocation of a new chunk. Creation of a pending block
-         * group modifies the extent, chunk and device trees, therefore we could
-         * deadlock with ourselves since we are holding a lock on an extent
-         * buffer that btrfs_create_pending_block_groups() may try to COW later.
-         */
-        if (root == fs_info->extent_root ||
-            root == fs_info->chunk_root ||
-            root == fs_info->dev_root)
-                trans->can_flush_pending_bgs = false;
-        cow = btrfs_alloc_tree_block(trans, root, parent_start,
-                        root->root_key.objectid, &disk_key, level,
-                        search_start, empty_size);
-        trans->can_flush_pending_bgs = true;
        if (IS_ERR(cow))
                return PTR_ERR(cow);
@@ -3343,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        else
                btrfs_node_key(lower, &lower_key, 0);
-        c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
+        c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
-                                   &lower_key, level, root->node->start, 0);
+                                         root->node->start, 0);
        if (IS_ERR(c))
                return PTR_ERR(c);
@@ -3473,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        mid = (c_nritems + 1) / 2;
        btrfs_node_key(c, &disk_key, mid);
-        split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
+        split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
-                        &disk_key, level, c->start, 0);
+                                             c->start, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
@@ -4258,8 +4282,8 @@ again:
        else
                btrfs_item_key(l, &disk_key, mid);
-        right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
+        right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
-                        &disk_key, 0, l->start, 0);
+                                             l->start, 0);
        if (IS_ERR(right))
                return PTR_ERR(right);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0a68cf7032f5..7a2a2621f0d9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -35,6 +35,7 @@
 struct btrfs_trans_handle;
 struct btrfs_transaction;
 struct btrfs_pending_snapshot;
+struct btrfs_delayed_ref_root;
 extern struct kmem_cache *btrfs_trans_handle_cachep;
 extern struct kmem_cache *btrfs_bit_radix_cachep;
 extern struct kmem_cache *btrfs_path_cachep;
@@ -786,6 +787,9 @@ enum {
         * main phase. The fs_info::balance_ctl is initialized.
         */
        BTRFS_FS_BALANCE_RUNNING,
+        /* Indicate that the cleaner thread is awake and doing something. */
+        BTRFS_FS_CLEANER_RUNNING,
 };
 struct btrfs_fs_info {
@@ -2661,6 +2665,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           unsigned long count);
 int btrfs_async_run_delayed_refs(struct btrfs_fs_info *fs_info,
                                 unsigned long count, u64 transid, int wait);
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_delayed_ref_root *delayed_refs,
+                                  struct btrfs_delayed_ref_head *head);
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info, u64 bytenr,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8da2f380d3c0..6a2a2a951705 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1682,6 +1682,8 @@ static int cleaner_kthread(void *arg)
        while (1) {
                again = 0;
+                set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
                /* Make the cleaner go to sleep early. */
                if (btrfs_need_cleaner_sleep(fs_info))
                        goto sleep;
@@ -1728,6 +1730,7 @@ static int cleaner_kthread(void *arg)
                 */
                btrfs_delete_unused_bgs(fs_info);
 sleep:
+                clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
                if (kthread_should_park())
                        kthread_parkme();
                if (kthread_should_stop())
@@ -4201,6 +4204,14 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
                spin_lock(&fs_info->ordered_root_lock);
        }
        spin_unlock(&fs_info->ordered_root_lock);
+        /*
+         * We need this here because if we've been flipped read-only we won't
+         * get sync() from the umount, so we need to make sure any ordered
+         * extents that haven't had their dirty pages IO start writeout yet
+         * actually get run and error out properly.
+         */
+        btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
 }
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -4265,6 +4276,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                if (pin_bytes)
                        btrfs_pin_extent(fs_info, head->bytenr,
                                         head->num_bytes, 1);
+                btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
                btrfs_put_delayed_ref_head(head);
                cond_resched();
                spin_lock(&delayed_refs->lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b15afeae16df..d81035b7ea7d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2456,12 +2456,10 @@ static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
        return ret ? ret : 1;
 }
-static void cleanup_ref_head_accounting(struct btrfs_trans_handle *trans,
+void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
-                                        struct btrfs_delayed_ref_head *head)
+                                  struct btrfs_delayed_ref_root *delayed_refs,
+                                  struct btrfs_delayed_ref_head *head)
 {
-        struct btrfs_fs_info *fs_info = trans->fs_info;
-        struct btrfs_delayed_ref_root *delayed_refs =
-                &trans->transaction->delayed_refs;
        int nr_items = 1;       /* Dropping this ref head update. */
        if (head->total_ref_mod < 0) {
@@ -2544,7 +2542,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
                }
        }
-        cleanup_ref_head_accounting(trans, head);
+        btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
        trace_run_delayed_ref_head(fs_info, head, 0);
        btrfs_delayed_ref_unlock(head);
@@ -4954,6 +4952,15 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                        ret = 0;
                break;
        case COMMIT_TRANS:
+                /*
+                 * If we have pending delayed iputs then we could free up a
+                 * bunch of pinned space, so make sure we run the iputs before
+                 * we do our pinned bytes check below.
+                 */
+                mutex_lock(&fs_info->cleaner_delayed_iput_mutex);
+                btrfs_run_delayed_iputs(fs_info);
+                mutex_unlock(&fs_info->cleaner_delayed_iput_mutex);
                ret = may_commit_transaction(fs_info, space_info);
                break;
        default:
@@ -7188,7 +7195,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
        if (head->must_insert_reserved)
                ret = 1;
-        cleanup_ref_head_accounting(trans, head);
+        btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
        mutex_unlock(&head->mutex);
        btrfs_put_delayed_ref_head(head);
        return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 43eb4535319d..5c349667c761 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3129,9 +3129,6 @@ out:
        /* once for the tree */
        btrfs_put_ordered_extent(ordered_extent);
-        /* Try to release some metadata so we don't get an OOM but don't wait */
-        btrfs_btree_balance_dirty_nodelay(fs_info);
        return ret;
 }
@@ -3254,6 +3251,8 @@ void btrfs_add_delayed_iput(struct inode *inode)
        ASSERT(list_empty(&binode->delayed_iput));
        list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
        spin_unlock(&fs_info->delayed_iput_lock);
+        if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
+                wake_up_process(fs_info->cleaner_kthread);
 }
 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index fab9443f6a42..9c8e1734429c 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
        inode_lock_nested(inode2, I_MUTEX_CHILD);
 }
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+                                       struct inode *inode2, u64 loff2, u64 len)
+{
+        unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+        unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+                                     struct inode *inode2, u64 loff2, u64 len)
+{
+        if (inode1 < inode2) {
+                swap(inode1, inode2);
+                swap(loff1, loff2);
+        } else if (inode1 == inode2 && loff2 < loff1) {
+                swap(loff1, loff2);
+        }
+        lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+        lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
 static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
                                   struct inode *dst, u64 dst_loff)
 {
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
                return -EINVAL;
        /*
-         * Lock destination range to serialize with concurrent readpages().
+         * Lock destination range to serialize with concurrent readpages() and
+         * source range to serialize with relocation.
         */
-        lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
+        btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
        ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
-        unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
+        btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
        return ret;
 }
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
                len = ALIGN(src->i_size, bs) - off;
        if (destoff > inode->i_size) {
+                const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
                ret = btrfs_cont_expand(inode, inode->i_size, destoff);
                if (ret)
                        return ret;
+                /*
+                 * We may have truncated the last block if the inode's size is
+                 * not sector size aligned, so we need to wait for writeback to
+                 * complete before proceeding further, otherwise we can race
+                 * with cloning and attempt to increment a reference to an
+                 * extent that no longer exists (writeback completed right after
+                 * we found the previous extent covering eof and before we
+                 * attempted to increment its reference count).
+                 */
+                ret = btrfs_wait_ordered_range(inode, wb_start,
+                                               destoff - wb_start);
+                if (ret)
+                        return ret;
        }
        /*
-         * Lock destination range to serialize with concurrent readpages().
+         * Lock destination range to serialize with concurrent readpages() and
+         * source range to serialize with relocation.
         */
-        lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
+        btrfs_double_extent_lock(src, off, inode, destoff, len);
        ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
-        unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
+        btrfs_double_extent_unlock(src, off, inode, destoff, len);
        /*
         * Truncate page cache pages so that future reads will see the cloned
         * data immediately and not the previous data.
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c5586ffd1426..0a3f122dd61f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                                flags | SB_RDONLY, device_name, data);
                        if (IS_ERR(mnt_root)) {
                                root = ERR_CAST(mnt_root);
+                                kfree(subvol_name);
                                goto out;
                        }
@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                        if (error < 0) {
                                root = ERR_PTR(error);
                                mntput(mnt_root);
+                                kfree(subvol_name);
                                goto out;
                        }
                }
        }
        if (IS_ERR(mnt_root)) {
                root = ERR_CAST(mnt_root);
+                kfree(subvol_name);
                goto out;
        }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 127fa1535f58..4ec2b660d014 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        btrfs_trans_release_chunk_metadata(trans);
-        if (lock && should_end_transaction(trans) &&
-            READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
-                spin_lock(&info->trans_lock);
-                if (cur_trans->state == TRANS_STATE_RUNNING)
-                        cur_trans->state = TRANS_STATE_BLOCKED;
-                spin_unlock(&info->trans_lock);
-        }
        if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
                if (throttle)
                        return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
 }
+/*
+ * Release reserved delayed ref space of all pending block groups of the
+ * transaction and remove them from the list
+ */
+static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_fs_info *fs_info = trans->fs_info;
+       struct btrfs_block_group_cache *block_group, *tmp;
+       list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
+               btrfs_delayed_refs_rsv_release(fs_info, 1);
+               list_del_init(&block_group->bg_list);
+       }
+}
 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
 {
        /*
@@ -2270,6 +2277,7 @@ scrub_continue:
        btrfs_scrub_continue(fs_info);
 cleanup_transaction:
        btrfs_trans_release_metadata(trans);
+        btrfs_cleanup_pending_block_groups(trans);
        btrfs_trans_release_chunk_metadata(trans);
        trans->block_rsv = NULL;
        btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2576b1a379c9..15561926ab32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                else
                        fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
-                fs_devices->fsid_change = fsid_change_in_progress;
                if (IS_ERR(fs_devices))
                        return ERR_CAST(fs_devices);
+                fs_devices->fsid_change = fsid_change_in_progress;
                mutex_lock(&fs_devices->device_list_mutex);
                list_add(&fs_devices->fs_list, &fs_uuids);
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
                ret = -EUCLEAN;
                goto out;
        }
+        /* It's possible this device is a dummy for seed device */
+        if (dev->disk_total_bytes == 0) {
+                dev = find_device(fs_info->fs_devices->seed, devid, NULL);
+                if (!dev) {
+                        btrfs_err(fs_info, "failed to find seed devid %llu",
+                                  devid);
+                        ret = -EUCLEAN;
+                        goto out;
+                }
+        }
        if (physical_offset + physical_len > dev->disk_total_bytes) {
                btrfs_err(fs_info,
 "dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
diff --git a/fs/buffer.c b/fs/buffer.c
index 52d024bfdbc1..48318fb74938 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -200,6 +200,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
        struct buffer_head *head;
        struct page *page;
        int all_mapped = 1;
+        static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
        index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
        page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
@@ -227,15 +228,15 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
         * file io on the block device and getblk.  It gets dealt with
         * elsewhere, don't buffer_error if we had some unmapped buffers
         */
-        if (all_mapped) {
+        ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
-                printk("__find_get_block_slow() failed. "
+        if (all_mapped && __ratelimit(&last_warned)) {
-                        "block=%llu, b_blocknr=%llu\n",
+                printk("__find_get_block_slow() failed. block=%llu, "
-                        (unsigned long long)block,
+                       "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
-                        (unsigned long long)bh->b_blocknr);
+                       "device %pg blocksize: %d\n",
-                printk("b_state=0x%08lx, b_size=%zu\n",
+                       (unsigned long long)block,
-                        bh->b_state, bh->b_size);
+                       (unsigned long long)bh->b_blocknr,
-                printk("device %pg blocksize: %d\n", bdev,
+                       bh->b_state, bh->b_size, bdev,
-                        1 << bd_inode->i_blkbits);
+                       1 << bd_inode->i_blkbits);
        }
 out_unlock:
        spin_unlock(&bd_mapping->private_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5d0c05e288cc..a47c541f8006 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1494,10 +1494,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
                if (err < 0 || off >= i_size_read(inode)) {
                        unlock_page(page);
                        put_page(page);
-                        if (err == -ENOMEM)
+                        ret = vmf_error(err);
-                                ret = VM_FAULT_OOM;
-                        else
-                                ret = VM_FAULT_SIGBUS;
                        goto out_inline;
                }
                if (err < PAGE_SIZE)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94c026bba2c2..bba28a5034ba 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1035,6 +1035,8 @@ static void drop_inode_snap_realm(struct ceph_inode_info *ci)
        list_del_init(&ci->i_snap_realm_item);
        ci->i_snap_realm_counter++;
        ci->i_snap_realm = NULL;
+        if (realm->ino == ci->i_vino.ino)
+                realm->inode = NULL;
        spin_unlock(&realm->inodes_with_caps_lock);
        ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
                            realm);
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 03f4d24db8fe..9455d3aef0c3 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -3,19 +3,6 @@
 * quota.c - CephFS quota
 *
 * Copyright (C) 2017-2018 SUSE
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
 #include <linux/statfs.h>
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 041c27ea8de1..f74193da0e09 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -616,7 +616,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
             capsnap->size);
        spin_lock(&mdsc->snap_flush_lock);
-        list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+        if (list_empty(&ci->i_snap_flush_item))
+                list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
        spin_unlock(&mdsc->snap_flush_lock);
        return 1;  /* caller may want to ceph_flush_snaps */
 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e9a7cc488da..da2cd8e89062 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -530,7 +530,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
        seq_putc(m, ',');
        pos = m->count;
-        ret = ceph_print_client_options(m, fsc->client);
+        ret = ceph_print_client_options(m, fsc->client, false);
        if (ret)
                return ret;
@@ -640,7 +640,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
        opt = NULL; /* fsc->client now owns this */
        fsc->client->extra_mon_dispatch = extra_mon_dispatch;
-        fsc->client->osdc.abort_on_full = true;
+        ceph_set_opt(fsc->client, ABORT_ON_FULL);
        if (!fsopt->mds_namespace) {
                ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 593fb422d0f3..e92a2fee3c57 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -252,6 +252,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        seq_printf(m, ",ACL");
 #endif
        seq_putc(m, '\n');
+        seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
        seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
        seq_printf(m, "Servers:");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 26776eddd85d..7652551a1fc4 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION   "2.15"
+#define CIFS_VERSION   "2.17"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 01ded7038b19..94dbdbe5be34 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1438,6 +1438,7 @@ struct mid_q_entry {
        int mid_state;  /* wish this were enum but can not pass to wait_event */
        unsigned int mid_flags;
        __le16 command;         /* smb command code */
+        unsigned int optype;    /* operation type */
        bool large_buf:1;       /* if valid response, is pointer to large buf */
        bool multiRsp:1;        /* multiple trans2 responses for one request  */
        bool multiEnd:1;        /* both received */
@@ -1574,6 +1575,25 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
        kfree(param);
 }
+static inline bool is_interrupt_error(int error)
+{
+        switch (error) {
+        case -EINTR:
+        case -ERESTARTSYS:
+        case -ERESTARTNOHAND:
+        case -ERESTARTNOINTR:
+                return true;
+        }
+        return false;
+}
+static inline bool is_retryable_error(int error)
+{
+        if (is_interrupt_error(error) || error == -EAGAIN)
+                return true;
+        return false;
+}
 #define   MID_FREE 0
 #define   MID_REQUEST_ALLOCATED 1
 #define   MID_REQUEST_SUBMITTED 2
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b1f49c1c543a..bb54ccf8481c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,24 +128,31 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
        int rc;
        struct dfs_cache_tgt_list tl;
        struct dfs_cache_tgt_iterator *it = NULL;
-        char tree[MAX_TREE_SIZE + 1];
+        char *tree;
        const char *tcp_host;
        size_t tcp_host_len;
        const char *dfs_host;
        size_t dfs_host_len;
+        tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+        if (!tree)
+                return -ENOMEM;
        if (tcon->ipc) {
-                snprintf(tree, sizeof(tree), "\\\\%s\\IPC$",
+                snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
                         tcon->ses->server->hostname);
-                return CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+                rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
+                goto out;
        }
-        if (!tcon->dfs_path)
+        if (!tcon->dfs_path) {
-                return CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+                rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+                goto out;
+        }
        rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
        if (rc)
-                return rc;
+                goto out;
        extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
                             &tcp_host_len);
@@ -165,7 +172,7 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
                        continue;
                }
-                snprintf(tree, sizeof(tree), "\\%s", tgt);
+                snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
                rc = CIFSTCon(0, tcon->ses, tree, tcon, nlsc);
                if (!rc)
@@ -182,6 +189,8 @@ static int __cifs_reconnect_tcon(const struct nls_table *nlsc,
                        rc = -ENOENT;
        }
        dfs_cache_free_tgts(&tl);
+out:
+        kfree(tree);
        return rc;
 }
 #else
@@ -1540,18 +1549,26 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
 }
 static int
-cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+__cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid,
+                     bool malformed)
 {
        int length;
-        struct cifs_readdata *rdata = mid->callback_data;
        length = cifs_discard_remaining_data(server);
-        dequeue_mid(mid, rdata->result);
+        dequeue_mid(mid, malformed);
        mid->resp_buf = server->smallbuf;
        server->smallbuf = NULL;
        return length;
 }
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+        struct cifs_readdata *rdata = mid->callback_data;
+        return  __cifs_readv_discard(server, mid, rdata->result);
+}
 int
 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 {
@@ -1593,12 +1610,23 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
                return -1;
        }
+        /* set up first two iov for signature check and to get credits */
+        rdata->iov[0].iov_base = buf;
+        rdata->iov[0].iov_len = 4;
+        rdata->iov[1].iov_base = buf + 4;
+        rdata->iov[1].iov_len = server->total_read - 4;
+        cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+                 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
+        cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
+                 rdata->iov[1].iov_base, rdata->iov[1].iov_len);
        /* Was the SMB read successful? */
        rdata->result = server->ops->map_error(buf, false);
        if (rdata->result != 0) {
                cifs_dbg(FYI, "%s: server returned error %d\n",
                         __func__, rdata->result);
-                return cifs_readv_discard(server, mid);
+                /* normal error on read response */
+                return __cifs_readv_discard(server, mid, false);
        }
        /* Is there enough to get to the rest of the READ_RSP header? */
@@ -1642,14 +1670,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
                server->total_read += length;
        }
-        /* set up first iov for signature check */
-        rdata->iov[0].iov_base = buf;
-        rdata->iov[0].iov_len = 4;
-        rdata->iov[1].iov_base = buf + 4;
-        rdata->iov[1].iov_len = server->total_read - 4;
-        cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
-                 rdata->iov[0].iov_base, server->total_read);
        /* how much data is in the response? */
 #ifdef CONFIG_CIFS_SMB_DIRECT
        use_rdma_mr = rdata->mr;
@@ -2114,7 +2134,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
                for (j = 0; j < nr_pages; j++) {
                        unlock_page(wdata2->pages[j]);
-                        if (rc != 0 && rc != -EAGAIN) {
+                        if (rc != 0 && !is_retryable_error(rc)) {
                                SetPageError(wdata2->pages[j]);
                                end_page_writeback(wdata2->pages[j]);
                                put_page(wdata2->pages[j]);
@@ -2123,7 +2143,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
                if (rc) {
                        kref_put(&wdata2->refcount, cifs_writedata_release);
-                        if (rc == -EAGAIN)
+                        if (is_retryable_error(rc))
                                continue;
                        break;
                }
@@ -2132,7 +2152,8 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
                i += nr_pages;
        } while (i < wdata->nr_pages);
-        mapping_set_error(inode->i_mapping, rc);
+        if (rc != 0 && !is_retryable_error(rc))
+                mapping_set_error(inode->i_mapping, rc);
        kref_put(&wdata->refcount, cifs_writedata_release);
 }
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index f66529679ca2..8463c940e0e5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -433,9 +433,10 @@ static void reconn_inval_dfs_target(struct TCP_Server_Info *server,
        kfree(server->hostname);
        server->hostname = extract_hostname(name);
-        if (!server->hostname) {
+        if (IS_ERR(server->hostname)) {
-                cifs_dbg(FYI, "%s: failed to extract hostname from target: %d\n",
+                cifs_dbg(FYI,
-                         __func__, -ENOMEM);
+                         "%s: failed to extract hostname from target: %ld\n",
+                         __func__, PTR_ERR(server->hostname));
        }
 }
@@ -719,6 +720,21 @@ server_unresponsive(struct TCP_Server_Info *server)
        return false;
 }
+static inline bool
+zero_credits(struct TCP_Server_Info *server)
+{
+        int val;
+        spin_lock(&server->req_lock);
+        val = server->credits + server->echo_credits + server->oplock_credits;
+        if (server->in_flight == 0 && val == 0) {
+                spin_unlock(&server->req_lock);
+                return true;
+        }
+        spin_unlock(&server->req_lock);
+        return false;
+}
 static int
 cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
 {
@@ -731,6 +747,12 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
        for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
                try_to_freeze();
+                /* reconnect if no credits and no requests in flight */
+                if (zero_credits(server)) {
+                        cifs_reconnect(server);
+                        return -ECONNABORTED;
+                }
                if (server_unresponsive(server))
                        return -ECONNABORTED;
                if (cifs_rdma_enabled(server) && server->smbd_conn)
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index cd63c4a70875..09b7d0d4f6e4 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -776,6 +776,7 @@ static int get_tgt_list(const struct dfs_cache_entry *ce,
                it->it_name = kstrndup(t->t_name, strlen(t->t_name),
                                       GFP_KERNEL);
                if (!it->it_name) {
+                        kfree(it);
                        rc = -ENOMEM;
                        goto err_free_it;
                }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e3e3a7550205..659ce1b92c44 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -733,7 +733,8 @@ reopen_success:
        if (can_flush) {
                rc = filemap_write_and_wait(inode->i_mapping);
-                mapping_set_error(inode->i_mapping, rc);
+                if (!is_interrupt_error(rc))
+                        mapping_set_error(inode->i_mapping, rc);
                if (tcon->unix_ext)
                        rc = cifs_get_inode_info_unix(&inode, full_path,
@@ -1132,14 +1133,18 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
        /*
         * Accessing maxBuf is racy with cifs_reconnect - need to store value
-         * and check it for zero before using.
+         * and check it before using.
         */
        max_buf = tcon->ses->server->maxBuf;
-        if (!max_buf) {
+        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
                free_xid(xid);
                return -EINVAL;
        }
+        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+                     PAGE_SIZE);
+        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+                        PAGE_SIZE);
        max_num = (max_buf - sizeof(struct smb_hdr)) /
                                                sizeof(LOCKING_ANDX_RANGE);
        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -1472,12 +1477,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
        /*
         * Accessing maxBuf is racy with cifs_reconnect - need to store value
-         * and check it for zero before using.
+         * and check it before using.
         */
        max_buf = tcon->ses->server->maxBuf;
-        if (!max_buf)
+        if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
                return -EINVAL;
+        BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
+                     PAGE_SIZE);
+        max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
+                        PAGE_SIZE);
        max_num = (max_buf - sizeof(struct smb_hdr)) /
                                                sizeof(LOCKING_ANDX_RANGE);
        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
@@ -2110,6 +2119,7 @@ static int cifs_writepages(struct address_space *mapping,
        pgoff_t end, index;
        struct cifs_writedata *wdata;
        int rc = 0;
+        int saved_rc = 0;
        unsigned int xid;
        /*
@@ -2138,8 +2148,10 @@ retry:
                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
                                                   &wsize, &credits);
-                if (rc)
+                if (rc != 0) {
+                        done = true;
                        break;
+                }
                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
@@ -2147,6 +2159,7 @@ retry:
                                                  &found_pages);
                if (!wdata) {
                        rc = -ENOMEM;
+                        done = true;
                        add_credits_and_wake_if(server, credits, 0);
                        break;
                }
@@ -2175,7 +2188,7 @@ retry:
                if (rc != 0) {
                        add_credits_and_wake_if(server, wdata->credits, 0);
                        for (i = 0; i < nr_pages; ++i) {
-                                if (rc == -EAGAIN)
+                                if (is_retryable_error(rc))
                                        redirty_page_for_writepage(wbc,
                                                           wdata->pages[i]);
                                else
@@ -2183,7 +2196,7 @@ retry:
                                end_page_writeback(wdata->pages[i]);
                                put_page(wdata->pages[i]);
                        }
-                        if (rc != -EAGAIN)
+                        if (!is_retryable_error(rc))
                                mapping_set_error(mapping, rc);
                }
                kref_put(&wdata->refcount, cifs_writedata_release);
@@ -2193,6 +2206,15 @@ retry:
                        continue;
                }
+                /* Return immediately if we received a signal during writing */
+                if (is_interrupt_error(rc)) {
+                        done = true;
+                        break;
+                }
+                if (rc != 0 && saved_rc == 0)
+                        saved_rc = rc;
                wbc->nr_to_write -= nr_pages;
                if (wbc->nr_to_write <= 0)
                        done = true;
@@ -2210,6 +2232,9 @@ retry:
                goto retry;
        }
+        if (saved_rc != 0)
+                rc = saved_rc;
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                mapping->writeback_index = index;
@@ -2242,8 +2267,8 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
        set_page_writeback(page);
 retry_write:
        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
-        if (rc == -EAGAIN) {
+        if (is_retryable_error(rc)) {
-                if (wbc->sync_mode == WB_SYNC_ALL)
+                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
                        goto retry_write;
                redirty_page_for_writepage(wbc, page);
        } else if (rc != 0) {
@@ -2671,6 +2696,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                        rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
                        if (rc) {
+                                kvfree(wdata->pages);
                                kfree(wdata);
                                add_credits_and_wake_if(server, credits, 0);
                                break;
@@ -2682,6 +2708,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
                        if (rc) {
                                for (i = 0; i < nr_pages; i++)
                                        put_page(wdata->pages[i]);
+                                kvfree(wdata->pages);
                                kfree(wdata);
                                add_credits_and_wake_if(server, credits, 0);
                                break;
@@ -3361,8 +3388,12 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                        }
                        rc = cifs_read_allocate_pages(rdata, npages);
-                        if (rc)
+                        if (rc) {
-                                goto error;
+                                kvfree(rdata->pages);
+                                kfree(rdata);
+                                add_credits_and_wake_if(server, credits, 0);
+                                break;
+                        }
                        rdata->tailsz = PAGE_SIZE;
                }
@@ -3382,7 +3413,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
                if (!rdata->cfile->invalidHandle ||
                    !(rc = cifs_reopen_file(rdata->cfile, true)))
                        rc = server->ops->async_readv(rdata);
-error:
                if (rc) {
                        add_credits_and_wake_if(server, rdata->credits, 0);
                        kref_put(&rdata->refcount,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 13fb59aadebc..478003644916 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2257,6 +2257,11 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
         * the flush returns error?
         */
        rc = filemap_write_and_wait(inode->i_mapping);
+        if (is_interrupt_error(rc)) {
+                rc = -ERESTARTSYS;
+                goto out;
+        }
        mapping_set_error(inode->i_mapping, rc);
        rc = 0;
@@ -2400,6 +2405,11 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
         * the flush returns error?
         */
        rc = filemap_write_and_wait(inode->i_mapping);
+        if (is_interrupt_error(rc)) {
+                rc = -ERESTARTSYS;
+                goto cifs_setattr_exit;
+        }
        mapping_set_error(inode->i_mapping, rc);
        rc = 0;
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 4ed10dd086e6..b204e84b87fb 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -122,12 +122,14 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
        /*
         * Accessing maxBuf is racy with cifs_reconnect - need to store value
-         * and check it for zero before using.
+         * and check it before using.
         */
        max_buf = tcon->ses->server->maxBuf;
-        if (!max_buf)
+        if (max_buf < sizeof(struct smb2_lock_element))
                return -EINVAL;
+        BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+        max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
        max_num = max_buf / sizeof(struct smb2_lock_element);
        buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
        if (!buf)
@@ -264,6 +266,8 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
                return -EINVAL;
        }
+        BUILD_BUG_ON(sizeof(struct smb2_lock_element) > PAGE_SIZE);
+        max_buf = min_t(unsigned int, max_buf, PAGE_SIZE);
        max_num = max_buf / sizeof(struct smb2_lock_element);
        buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
        if (!buf) {
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index f14533da3a93..01a76bccdb8d 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -293,6 +293,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
        int rc;
        struct smb2_file_all_info *smb2_data;
        __u32 create_options = 0;
+        struct cifs_fid fid;
+        bool no_cached_open = tcon->nohandlecache;
        *adjust_tz = false;
        *symlink = false;
@@ -301,6 +303,21 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                            GFP_KERNEL);
        if (smb2_data == NULL)
                return -ENOMEM;
+        /* If it is a root and its handle is cached then use it */
+        if (!strlen(full_path) && !no_cached_open) {
+                rc = open_shroot(xid, tcon, &fid);
+                if (rc)
+                        goto out;
+                rc = SMB2_query_info(xid, tcon, fid.persistent_fid,
+                                     fid.volatile_fid, smb2_data);
+                close_shroot(&tcon->crfid);
+                if (rc)
+                        goto out;
+                move_smb2_info_to_cifs(data, smb2_data);
+                goto out;
+        }
        if (backup_cred(cifs_sb))
                create_options |= CREATE_OPEN_BACKUP_INTENT;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 6a9c47541c53..7b8b58fb4d3f 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -648,6 +648,13 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
        if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK)
                return false;
+        if (rsp->sync_hdr.CreditRequest) {
+                spin_lock(&server->req_lock);
+                server->credits += le16_to_cpu(rsp->sync_hdr.CreditRequest);
+                spin_unlock(&server->req_lock);
+                wake_up(&server->request_q);
+        }
        if (rsp->StructureSize !=
                                smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
                if (le16_to_cpu(rsp->StructureSize) == 44)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index cf7eb891804f..6f96e2292856 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -34,6 +34,7 @@
 #include "cifs_ioctl.h"
 #include "smbdirect.h"
+/* Change credits for different ops and return the total number of credits */
 static int
 change_conf(struct TCP_Server_Info *server)
 {
@@ -41,17 +42,15 @@ change_conf(struct TCP_Server_Info *server)
        server->oplock_credits = server->echo_credits = 0;
        switch (server->credits) {
        case 0:
-                return -1;
+                return 0;
        case 1:
                server->echoes = false;
                server->oplocks = false;
-                cifs_dbg(VFS, "disabling echoes and oplocks\n");
                break;
        case 2:
                server->echoes = true;
                server->oplocks = false;
                server->echo_credits = 1;
-                cifs_dbg(FYI, "disabling oplocks\n");
                break;
        default:
                server->echoes = true;
@@ -64,14 +63,15 @@ change_conf(struct TCP_Server_Info *server)
                server->echo_credits = 1;
        }
        server->credits -= server->echo_credits + server->oplock_credits;
-        return 0;
+        return server->credits + server->echo_credits + server->oplock_credits;
 }
 static void
 smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
                 const int optype)
 {
-        int *val, rc = 0;
+        int *val, rc = -1;
        spin_lock(&server->req_lock);
        val = server->ops->get_credits_field(server, optype);
@@ -101,8 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
        }
        spin_unlock(&server->req_lock);
        wake_up(&server->request_q);
-        if (rc)
-                cifs_reconnect(server);
+        if (server->tcpStatus == CifsNeedReconnect)
+                return;
+        switch (rc) {
+        case -1:
+                /* change_conf hasn't been executed */
+                break;
+        case 0:
+                cifs_dbg(VFS, "Possible client or server bug - zero credits\n");
+                break;
+        case 1:
+                cifs_dbg(VFS, "disabling echoes and oplocks\n");
+                break;
+        case 2:
+                cifs_dbg(FYI, "disabling oplocks\n");
+                break;
+        default:
+                cifs_dbg(FYI, "add %u credits total=%d\n", add, rc);
+        }
 }
 static void
@@ -136,7 +154,11 @@ smb2_get_credits(struct mid_q_entry *mid)
 {
        struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
-        return le16_to_cpu(shdr->CreditRequest);
+        if (mid->mid_state == MID_RESPONSE_RECEIVED
+            || mid->mid_state == MID_RESPONSE_MALFORMED)
+                return le16_to_cpu(shdr->CreditRequest);
+        return 0;
 }
 static int
@@ -165,14 +187,14 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
                        scredits = server->credits;
                        /* can deadlock with reopen */
-                        if (scredits == 1) {
+                        if (scredits <= 8) {
                                *num = SMB2_MAX_BUFFER_SIZE;
                                *credits = 0;
                                break;
                        }
-                        /* leave one credit for a possible reopen */
+                        /* leave some credits for reopen and other ops */
-                        scredits--;
+                        scredits -= 8;
                        *num = min_t(unsigned int, size,
                                     scredits * SMB2_MAX_BUFFER_SIZE);
@@ -844,7 +866,9 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
                                      FILE_READ_EA,
                                      FILE_FULL_EA_INFORMATION,
                                      SMB2_O_INFO_FILE,
-                                      SMB2_MAX_EA_BUF,
+                                      CIFSMaxBufSize -
+                                      MAX_SMB2_CREATE_RESPONSE_SIZE -
+                                      MAX_SMB2_CLOSE_RESPONSE_SIZE,
                                      &rsp_iov, &buftype, cifs_sb);
        if (rc) {
                /*
@@ -3189,11 +3213,23 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                        server->ops->is_status_pending(buf, server, 0))
                return -1;
-        rdata->result = server->ops->map_error(buf, false);
+        /* set up first two iov to get credits */
+        rdata->iov[0].iov_base = buf;
+        rdata->iov[0].iov_len = 4;
+        rdata->iov[1].iov_base = buf + 4;
+        rdata->iov[1].iov_len =
+                min_t(unsigned int, buf_len, server->vals->read_rsp_size) - 4;
+        cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
+                 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
+        cifs_dbg(FYI, "1: iov_base=%p iov_len=%zu\n",
+                 rdata->iov[1].iov_base, rdata->iov[1].iov_len);
+        rdata->result = server->ops->map_error(buf, true);
        if (rdata->result != 0) {
                cifs_dbg(FYI, "%s: server returned error %d\n",
                         __func__, rdata->result);
-                dequeue_mid(mid, rdata->result);
+                /* normal error on read response */
+                dequeue_mid(mid, false);
                return 0;
        }
@@ -3266,14 +3302,6 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
                return 0;
        }
-        /* set up first iov for signature check */
-        rdata->iov[0].iov_base = buf;
-        rdata->iov[0].iov_len = 4;
-        rdata->iov[1].iov_base = buf + 4;
-        rdata->iov[1].iov_len = server->vals->read_rsp_size - 4;
-        cifs_dbg(FYI, "0: iov_base=%p iov_len=%zu\n",
-                 rdata->iov[0].iov_base, server->vals->read_rsp_size);
        length = rdata->copy_into_pages(server, rdata, &iter);
        kfree(bvec);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e57f6aa1d638..77b3aaa39b35 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -162,24 +162,31 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
        int rc;
        struct dfs_cache_tgt_list tl;
        struct dfs_cache_tgt_iterator *it = NULL;
-        char tree[MAX_TREE_SIZE + 1];
+        char *tree;
        const char *tcp_host;
        size_t tcp_host_len;
        const char *dfs_host;
        size_t dfs_host_len;
+        tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+        if (!tree)
+                return -ENOMEM;
        if (tcon->ipc) {
-                snprintf(tree, sizeof(tree), "\\\\%s\\IPC$",
+                snprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$",
                         tcon->ses->server->hostname);
-                return SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+                rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
+                goto out;
        }
-        if (!tcon->dfs_path)
+        if (!tcon->dfs_path) {
-                return SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+                rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nlsc);
+                goto out;
+        }
        rc = dfs_cache_noreq_find(tcon->dfs_path + 1, NULL, &tl);
        if (rc)
-                return rc;
+                goto out;
        extract_unc_hostname(tcon->ses->server->hostname, &tcp_host,
                             &tcp_host_len);
@@ -199,7 +206,7 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
                        continue;
                }
-                snprintf(tree, sizeof(tree), "\\%s", tgt);
+                snprintf(tree, MAX_TREE_SIZE, "\\%s", tgt);
                rc = SMB2_tcon(0, tcon->ses, tree, tcon, nlsc);
                if (!rc)
@@ -216,6 +223,8 @@ static int __smb2_reconnect(const struct nls_table *nlsc,
                        rc = -ENOENT;
        }
        dfs_cache_free_tgts(&tl);
+out:
+        kfree(tree);
        return rc;
 }
 #else
@@ -2807,6 +2816,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
        int resp_buftype = CIFS_NO_BUFFER;
        struct cifs_ses *ses = tcon->ses;
        int flags = 0;
+        bool allocated = false;
        cifs_dbg(FYI, "Query Info\n");
@@ -2846,14 +2856,21 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
                                        "Error %d allocating memory for acl\n",
                                        rc);
                                *dlen = 0;
+                                rc = -ENOMEM;
                                goto qinf_exit;
                        }
+                        allocated = true;
                }
        }
        rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
                                        le32_to_cpu(rsp->OutputBufferLength),
                                        &rsp_iov, min_len, *data);
+        if (rc && allocated) {
+                kfree(*data);
+                *data = NULL;
+                *dlen = 0;
+        }
 qinf_exit:
        SMB2_query_info_free(&rqst);
@@ -2907,9 +2924,10 @@ smb2_echo_callback(struct mid_q_entry *mid)
 {
        struct TCP_Server_Info *server = mid->callback_data;
        struct smb2_echo_rsp *rsp = (struct smb2_echo_rsp *)mid->resp_buf;
-        unsigned int credits_received = 1;
+        unsigned int credits_received = 0;
-        if (mid->mid_state == MID_RESPONSE_RECEIVED)
+        if (mid->mid_state == MID_RESPONSE_RECEIVED
+            || mid->mid_state == MID_RESPONSE_MALFORMED)
                credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
        DeleteMidQEntry(mid);
@@ -3166,7 +3184,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
        struct TCP_Server_Info *server = tcon->ses->server;
        struct smb2_sync_hdr *shdr =
                                (struct smb2_sync_hdr *)rdata->iov[0].iov_base;
-        unsigned int credits_received = 1;
+        unsigned int credits_received = 0;
        struct smb_rqst rqst = { .rq_iov = rdata->iov,
                                 .rq_nvec = 2,
                                 .rq_pages = rdata->pages,
@@ -3205,6 +3223,9 @@ smb2_readv_callback(struct mid_q_entry *mid)
                task_io_account_read(rdata->got_bytes);
                cifs_stats_bytes_read(tcon, rdata->got_bytes);
                break;
+        case MID_RESPONSE_MALFORMED:
+                credits_received = le16_to_cpu(shdr->CreditRequest);
+                /* fall through */
        default:
                if (rdata->result != -ENODATA)
                        rdata->result = -EIO;
@@ -3220,8 +3241,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
                rdata->mr = NULL;
        }
 #endif
-        if (rdata->result)
+        if (rdata->result && rdata->result != -ENODATA) {
                cifs_stats_fail_inc(tcon, SMB2_READ_HE);
+                trace_smb3_read_err(0 /* xid */,
+                                    rdata->cfile->fid.persistent_fid,
+                                    tcon->tid, tcon->ses->Suid, rdata->offset,
+                                    rdata->bytes, rdata->result);
+        } else
+                trace_smb3_read_done(0 /* xid */,
+                                     rdata->cfile->fid.persistent_fid,
+                                     tcon->tid, tcon->ses->Suid,
+                                     rdata->offset, rdata->got_bytes);
        queue_work(cifsiod_wq, &rdata->work);
        DeleteMidQEntry(mid);
@@ -3278,12 +3308,14 @@ smb2_async_readv(struct cifs_readdata *rdata)
        if (rdata->credits) {
                shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
                                                SMB2_MAX_BUFFER_SIZE));
-                shdr->CreditRequest = shdr->CreditCharge;
+                shdr->CreditRequest =
+                        cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
                spin_lock(&server->req_lock);
                server->credits += rdata->credits -
                                                le16_to_cpu(shdr->CreditCharge);
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
+                rdata->credits = le16_to_cpu(shdr->CreditCharge);
                flags |= CIFS_HAS_CREDITS;
        }
@@ -3294,13 +3326,11 @@ smb2_async_readv(struct cifs_readdata *rdata)
        if (rc) {
                kref_put(&rdata->refcount, cifs_readdata_release);
                cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
-                trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid,
+                trace_smb3_read_err(0 /* xid */, io_parms.persistent_fid,
-                                   io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+                                    io_parms.tcon->tid,
-                                   io_parms.offset, io_parms.length);
+                                    io_parms.tcon->ses->Suid,
-        } else
+                                    io_parms.offset, io_parms.length, rc);
-                trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid,
+        }
-                                   io_parms.tcon->tid, io_parms.tcon->ses->Suid,
-                                   io_parms.offset, io_parms.length);
        cifs_small_buf_release(buf);
        return rc;
@@ -3344,10 +3374,11 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
                if (rc != -ENODATA) {
                        cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
                        cifs_dbg(VFS, "Send error in read = %d\n", rc);
+                        trace_smb3_read_err(xid, req->PersistentFileId,
+                                            io_parms->tcon->tid, ses->Suid,
+                                            io_parms->offset, io_parms->length,
+                                            rc);
                }
-                trace_smb3_read_err(rc, xid, req->PersistentFileId,
-                                    io_parms->tcon->tid, ses->Suid,
-                                    io_parms->offset, io_parms->length);
                free_rsp_buf(resp_buftype, rsp_iov.iov_base);
                return rc == -ENODATA ? 0 : rc;
        } else
@@ -3388,7 +3419,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
        struct cifs_tcon *tcon = tlink_tcon(wdata->cfile->tlink);
        unsigned int written;
        struct smb2_write_rsp *rsp = (struct smb2_write_rsp *)mid->resp_buf;
-        unsigned int credits_received = 1;
+        unsigned int credits_received = 0;
        switch (mid->mid_state) {
        case MID_RESPONSE_RECEIVED:
@@ -3416,6 +3447,9 @@ smb2_writev_callback(struct mid_q_entry *mid)
        case MID_RETRY_NEEDED:
                wdata->result = -EAGAIN;
                break;
+        case MID_RESPONSE_MALFORMED:
+                credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
+                /* fall through */
        default:
                wdata->result = -EIO;
                break;
@@ -3433,8 +3467,17 @@ smb2_writev_callback(struct mid_q_entry *mid)
                wdata->mr = NULL;
        }
 #endif
-        if (wdata->result)
+        if (wdata->result) {
                cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
+                trace_smb3_write_err(0 /* no xid */,
+                                     wdata->cfile->fid.persistent_fid,
+                                     tcon->tid, tcon->ses->Suid, wdata->offset,
+                                     wdata->bytes, wdata->result);
+        } else
+                trace_smb3_write_done(0 /* no xid */,
+                                      wdata->cfile->fid.persistent_fid,
+                                      tcon->tid, tcon->ses->Suid,
+                                      wdata->offset, wdata->bytes);
        queue_work(cifsiod_wq, &wdata->work);
        DeleteMidQEntry(mid);
@@ -3555,12 +3598,14 @@ smb2_async_writev(struct cifs_writedata *wdata,
        if (wdata->credits) {
                shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
                                                    SMB2_MAX_BUFFER_SIZE));
-                shdr->CreditRequest = shdr->CreditCharge;
+                shdr->CreditRequest =
+                        cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
                spin_lock(&server->req_lock);
                server->credits += wdata->credits -
                                                le16_to_cpu(shdr->CreditCharge);
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
+                wdata->credits = le16_to_cpu(shdr->CreditCharge);
                flags |= CIFS_HAS_CREDITS;
        }
@@ -3574,10 +3619,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
                                     wdata->bytes, rc);
                kref_put(&wdata->refcount, release);
                cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
-        } else
+        }
-                trace_smb3_write_done(0 /* no xid */, req->PersistentFileId,
-                                     tcon->tid, tcon->ses->Suid, wdata->offset,
-                                     wdata->bytes);
 async_writev_out:
        cifs_small_buf_release(req);
@@ -3803,8 +3845,8 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
                    rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
                        srch_inf->endOfSearch = true;
                        rc = 0;
-                }
+                } else
-                cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
+                        cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
                goto qdir_exit;
        }
@@ -4399,8 +4441,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
        rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
        cifs_small_buf_release(req);
-        please_key_low = (__u64 *)req->LeaseKey;
+        please_key_low = (__u64 *)lease_key;
-        please_key_high = (__u64 *)(req->LeaseKey+8);
+        please_key_high = (__u64 *)(lease_key+8);
        if (rc) {
                cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
                trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 7a2d0a2255e6..538e2299805f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -84,8 +84,9 @@
 #define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */
+/* 52 transform hdr + 64 hdr + 88 create rsp */
-#define MAX_SMB2_HDR_SIZE 0x00b0
+#define SMB2_TRANSFORM_HEADER_SIZE 52
+#define MAX_SMB2_HDR_SIZE 204
 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
 #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
@@ -648,6 +649,13 @@ struct smb2_create_req {
        __u8   Buffer[0];
 } __packed;
+/*
+ * Maximum size of a SMB2_CREATE response is 64 (smb2 header) +
+ * 88 (fixed part of create response) + 520 (path) + 150 (contexts) +
+ * 2 bytes of padding.
+ */
+#define MAX_SMB2_CREATE_RESPONSE_SIZE 824
 struct smb2_create_rsp {
        struct smb2_sync_hdr sync_hdr;
        __le16 StructureSize;   /* Must be 89 */
@@ -996,6 +1004,11 @@ struct smb2_close_req {
        __u64  VolatileFileId; /* opaque endianness */
 } __packed;
+/*
+ * Maximum size of a SMB2_CLOSE response is 64 (smb2 header) + 60 (data)
+ */
+#define MAX_SMB2_CLOSE_RESPONSE_SIZE 124
 struct smb2_close_rsp {
        struct smb2_sync_hdr sync_hdr;
        __le16 StructureSize; /* 60 */
@@ -1398,8 +1411,6 @@ struct smb2_file_link_info { /* encoding of request for level 11 */
        char   FileName[0];     /* Name to be assigned to new link */
 } __packed; /* level 11 Set */
-#define SMB2_MAX_EA_BUF 65536
 struct smb2_file_full_ea_info { /* encoding of response for level 15 */
        __le32 next_entry_offset;
        __u8   flags;
diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c
index bd4a546feec1..465483787193 100644
--- a/fs/cifs/trace.c
+++ b/fs/cifs/trace.c
@@ -3,16 +3,6 @@
 *   Copyright (C) 2018, Microsoft Corporation.
 *
 *   Author(s): Steve French <stfrench@microsoft.com>
- *
- *   This program is free software;  you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU General Public License for more details.
 */
 #define CREATE_TRACE_POINTS
 #include "trace.h"
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index fb049809555f..59be48206932 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -3,16 +3,6 @@
 *   Copyright (C) 2018, Microsoft Corporation.
 *
 *   Author(s): Steve French <stfrench@microsoft.com>
- *
- *   This program is free software;  you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
- *   the GNU General Public License for more details.
 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM cifs
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 5be7302853b6..53532bd3f50d 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -387,7 +387,7 @@ smbd_done:
        if (rc < 0 && rc != -EINTR)
                cifs_dbg(VFS, "Error %d sending data on socket to server\n",
                         rc);
-        else
+        else if (rc > 0)
                rc = 0;
        return rc;
@@ -783,8 +783,25 @@ cifs_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst)
 }
 static void
-cifs_noop_callback(struct mid_q_entry *mid)
+cifs_compound_callback(struct mid_q_entry *mid)
+{
+        struct TCP_Server_Info *server = mid->server;
+        add_credits(server, server->ops->get_credits(mid), mid->optype);
+}
+static void
+cifs_compound_last_callback(struct mid_q_entry *mid)
 {
+        cifs_compound_callback(mid);
+        cifs_wake_up_task(mid);
+}
+static void
+cifs_cancelled_callback(struct mid_q_entry *mid)
+{
+        cifs_compound_callback(mid);
+        DeleteMidQEntry(mid);
 }
 int
@@ -795,7 +812,8 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        int i, j, rc = 0;
        int timeout, optype;
        struct mid_q_entry *midQ[MAX_COMPOUND];
-        unsigned int credits = 0;
+        bool cancelled_mid[MAX_COMPOUND] = {false};
+        unsigned int credits[MAX_COMPOUND] = {0};
        char *buf;
        timeout = flags & CIFS_TIMEOUT_MASK;
@@ -813,13 +831,31 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
                return -ENOENT;
        /*
-         * Ensure that we do not send more than 50 overlapping requests
+         * Ensure we obtain 1 credit per request in the compound chain.
-         * to the same server. We may make this configurable later or
+         * It can be optimized further by waiting for all the credits
-         * use ses->maxReq.
+         * at once but this can wait long enough if we don't have enough
+         * credits due to some heavy operations in progress or the server
+         * not granting us much, so a fallback to the current approach is
+         * needed anyway.
         */
-        rc = wait_for_free_request(ses->server, timeout, optype);
+        for (i = 0; i < num_rqst; i++) {
-        if (rc)
+                rc = wait_for_free_request(ses->server, timeout, optype);
-                return rc;
+                if (rc) {
+                        /*
+                         * We haven't sent an SMB packet to the server yet but
+                         * we already obtained credits for i requests in the
+                         * compound chain - need to return those credits back
+                         * for future use. Note that we need to call add_credits
+                         * multiple times to match the way we obtained credits
+                         * in the first place and to account for in flight
+                         * requests correctly.
+                         */
+                        for (j = 0; j < i; j++)
+                                add_credits(ses->server, 1, optype);
+                        return rc;
+                }
+                credits[i] = 1;
+        }
        /*
         * Make sure that we sign in the same order that we send on this socket
@@ -835,18 +871,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
                        for (j = 0; j < i; j++)
                                cifs_delete_mid(midQ[j]);
                        mutex_unlock(&ses->server->srv_mutex);
                        /* Update # of requests on wire to server */
-                        add_credits(ses->server, 1, optype);
+                        for (j = 0; j < num_rqst; j++)
+                                add_credits(ses->server, credits[j], optype);
                        return PTR_ERR(midQ[i]);
                }
                midQ[i]->mid_state = MID_REQUEST_SUBMITTED;
+                midQ[i]->optype = optype;
                /*
-                 * We don't invoke the callback compounds unless it is the last
+                 * Invoke callback for every part of the compound chain
-                 * request.
+                 * to calculate credits properly. Wake up this thread only when
+                 * the last element is received.
                 */
                if (i < num_rqst - 1)
-                        midQ[i]->callback = cifs_noop_callback;
+                        midQ[i]->callback = cifs_compound_callback;
+                else
+                        midQ[i]->callback = cifs_compound_last_callback;
        }
        cifs_in_send_inc(ses->server);
        rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);
@@ -860,8 +902,20 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        mutex_unlock(&ses->server->srv_mutex);
-        if (rc < 0)
+        if (rc < 0) {
+                /* Sending failed for some reason - return credits back */
+                for (i = 0; i < num_rqst; i++)
+                        add_credits(ses->server, credits[i], optype);
                goto out;
+        }
+        /*
+         * At this point the request is passed to the network stack - we assume
+         * that any credits taken from the server structure on the client have
+         * been spent and we can't return them back. Once we receive responses
+         * we will collect credits granted by the server in the mid callbacks
+         * and add those credits to the server structure.
+         */
        /*
         * Compounding is never used during session establish.
@@ -875,36 +929,34 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        for (i = 0; i < num_rqst; i++) {
                rc = wait_for_response(ses->server, midQ[i]);
-                if (rc != 0) {
+                if (rc != 0)
+                        break;
+        }
+        if (rc != 0) {
+                for (; i < num_rqst; i++) {
                        cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
                                 midQ[i]->mid, le16_to_cpu(midQ[i]->command));
                        send_cancel(ses->server, &rqst[i], midQ[i]);
                        spin_lock(&GlobalMid_Lock);
                        if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
                                midQ[i]->mid_flags |= MID_WAIT_CANCELLED;
-                                midQ[i]->callback = DeleteMidQEntry;
+                                midQ[i]->callback = cifs_cancelled_callback;
-                                spin_unlock(&GlobalMid_Lock);
+                                cancelled_mid[i] = true;
-                                add_credits(ses->server, 1, optype);
+                                credits[i] = 0;
-                                return rc;
                        }
                        spin_unlock(&GlobalMid_Lock);
                }
        }
-        for (i = 0; i < num_rqst; i++)
-                if (midQ[i]->resp_buf)
-                        credits += ses->server->ops->get_credits(midQ[i]);
-        if (!credits)
-                credits = 1;
        for (i = 0; i < num_rqst; i++) {
                if (rc < 0)
                        goto out;
                rc = cifs_sync_mid_result(midQ[i], ses->server);
                if (rc != 0) {
-                        add_credits(ses->server, credits, optype);
+                        /* mark this mid as cancelled to not free it below */
-                        return rc;
+                        cancelled_mid[i] = true;
+                        goto out;
                }
                if (!midQ[i]->resp_buf ||
@@ -951,9 +1003,10 @@ out:
         * This is prevented above by using a noop callback that will not
         * wake this thread except for the very last PDU.
         */
-        for (i = 0; i < num_rqst; i++)
+        for (i = 0; i < num_rqst; i++) {
-                cifs_delete_mid(midQ[i]);
+                if (!cancelled_mid[i])
-        add_credits(ses->server, credits, optype);
+                        cifs_delete_mid(midQ[i]);
+        }
        return rc;
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index 2593153471cf..aac41adf4743 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -119,6 +119,7 @@ struct dentry_stat_t dentry_stat = {
 static DEFINE_PER_CPU(long, nr_dentry);
 static DEFINE_PER_CPU(long, nr_dentry_unused);
+static DEFINE_PER_CPU(long, nr_dentry_negative);
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
@@ -152,11 +153,22 @@ static long get_nr_dentry_unused(void)
        return sum < 0 ? 0 : sum;
 }
+static long get_nr_dentry_negative(void)
+{
+        int i;
+        long sum = 0;
+        for_each_possible_cpu(i)
+                sum += per_cpu(nr_dentry_negative, i);
+        return sum < 0 ? 0 : sum;
+}
 int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
                   size_t *lenp, loff_t *ppos)
 {
        dentry_stat.nr_dentry = get_nr_dentry();
        dentry_stat.nr_unused = get_nr_dentry_unused();
+        dentry_stat.nr_negative = get_nr_dentry_negative();
        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
@@ -317,6 +329,8 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry)
        flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
        WRITE_ONCE(dentry->d_flags, flags);
        dentry->d_inode = NULL;
+        if (dentry->d_flags & DCACHE_LRU_LIST)
+                this_cpu_inc(nr_dentry_negative);
 }
 static void dentry_free(struct dentry *dentry)
@@ -371,6 +385,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
 * The per-cpu "nr_dentry_unused" counters are updated with
 * the DCACHE_LRU_LIST bit.
 *
+ * The per-cpu "nr_dentry_negative" counters are only updated
+ * when deleted from or added to the per-superblock LRU list, not
+ * from/to the shrink list. That is to avoid an unneeded dec/inc
+ * pair when moving from LRU to shrink list in select_collect().
+ *
 * These helper functions make sure we always follow the
 * rules. d_lock must be held by the caller.
 */
@@ -380,6 +399,8 @@ static void d_lru_add(struct dentry *dentry)
        D_FLAG_VERIFY(dentry, 0);
        dentry->d_flags |= DCACHE_LRU_LIST;
        this_cpu_inc(nr_dentry_unused);
+        if (d_is_negative(dentry))
+                this_cpu_inc(nr_dentry_negative);
        WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
@@ -388,6 +409,8 @@ static void d_lru_del(struct dentry *dentry)
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags &= ~DCACHE_LRU_LIST;
        this_cpu_dec(nr_dentry_unused);
+        if (d_is_negative(dentry))
+                this_cpu_dec(nr_dentry_negative);
        WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
@@ -418,6 +441,8 @@ static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags &= ~DCACHE_LRU_LIST;
        this_cpu_dec(nr_dentry_unused);
+        if (d_is_negative(dentry))
+                this_cpu_dec(nr_dentry_negative);
        list_lru_isolate(lru, &dentry->d_lru);
 }
@@ -426,6 +451,8 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
 {
        D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
        dentry->d_flags |= DCACHE_SHRINK_LIST;
+        if (d_is_negative(dentry))
+                this_cpu_dec(nr_dentry_negative);
        list_lru_isolate_move(lru, &dentry->d_lru, list);
 }
@@ -1188,15 +1215,11 @@ static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
 */
 void shrink_dcache_sb(struct super_block *sb)
 {
-        long freed;
        do {
                LIST_HEAD(dispose);
-                freed = list_lru_walk(&sb->s_dentry_lru,
+                list_lru_walk(&sb->s_dentry_lru,
                        dentry_lru_isolate_shrink, &dispose, 1024);
-                this_cpu_sub(nr_dentry_unused, freed);
                shrink_dentry_list(&dispose);
        } while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
@@ -1820,6 +1843,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        WARN_ON(d_in_lookup(dentry));
        spin_lock(&dentry->d_lock);
+        /*
+         * Decrement negative dentry count if it was in the LRU list.
+         */
+        if (dentry->d_flags & DCACHE_LRU_LIST)
+                this_cpu_dec(nr_dentry_negative);
        hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
        raw_write_seqcount_begin(&dentry->d_seq);
        __d_set_inode_and_type(dentry, inode, add_flags);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 13b01351dd1c..29c68c5d44d5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -324,7 +324,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
        inode_unlock(d_inode(dentry->d_parent));
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-        return NULL;
+        return ERR_PTR(-ENOMEM);
 }
 static struct dentry *end_creating(struct dentry *dentry)
@@ -347,7 +347,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
        dentry = start_creating(name, parent);
        if (IS_ERR(dentry))
-                return NULL;
+                return dentry;
        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
@@ -386,7 +386,8 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
@@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_unsafe);
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
@@ -495,7 +497,8 @@ EXPORT_SYMBOL_GPL(debugfs_create_file_size);
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.)  If an error occurs, %NULL will be returned.
+ * you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
@@ -506,7 +509,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
        struct inode *inode;
        if (IS_ERR(dentry))
-                return NULL;
+                return dentry;
        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
@@ -545,7 +548,7 @@ struct dentry *debugfs_create_automount(const char *name,
        struct inode *inode;
        if (IS_ERR(dentry))
-                return NULL;
+                return dentry;
        inode = debugfs_get_inode(dentry->d_sb);
        if (unlikely(!inode))
@@ -581,8 +584,8 @@ EXPORT_SYMBOL(debugfs_create_automount);
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the symbolic
 * link is to be removed (no automatic cleanup happens if your module is
- * unloaded, you are responsible here.)  If an error occurs, %NULL will be
+ * unloaded, you are responsible here.)  If an error occurs, %ERR_PTR(-ERROR)
- * returned.
+ * will be returned.
 *
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
@@ -594,12 +597,12 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
        struct inode *inode;
        char *link = kstrdup(target, GFP_KERNEL);
        if (!link)
-                return NULL;
+                return ERR_PTR(-ENOMEM);
        dentry = start_creating(name, parent);
        if (IS_ERR(dentry)) {
                kfree(link);
-                return NULL;
+                return dentry;
        }
        inode = debugfs_get_inode(dentry->d_sb);
@@ -787,6 +790,13 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
        struct dentry *dentry = NULL, *trap;
        struct name_snapshot old_name;
+        if (IS_ERR(old_dir))
+                return old_dir;
+        if (IS_ERR(new_dir))
+                return new_dir;
+        if (IS_ERR_OR_NULL(old_dentry))
+                return old_dentry;
        trap = lock_rename(new_dir, old_dir);
        /* Source or destination directories don't exist? */
        if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
@@ -820,7 +830,9 @@ exit:
        if (dentry && !IS_ERR(dentry))
                dput(dentry);
        unlock_rename(new_dir, old_dir);
-        return NULL;
+        if (IS_ERR(dentry))
+                return dentry;
+        return ERR_PTR(-EINVAL);
 }
 EXPORT_SYMBOL_GPL(debugfs_rename);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index dbc1a1f080ce..ec2fb6fe6d37 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -679,6 +679,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
        unsigned long fs_count; /* Number of filesystem-sized blocks */
        int create;
        unsigned int i_blkbits = sdio->blkbits + sdio->blkfactor;
+        loff_t i_size;
        /*
         * If there was a memory error and we've overwritten all the
@@ -708,8 +709,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
                 */
                create = dio->op == REQ_OP_WRITE;
                if (dio->flags & DIO_SKIP_HOLES) {
-                        if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
+                        i_size = i_size_read(dio->inode);
-                                                        i_blkbits))
+                        if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits)
                                create = 0;
                }
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 82377017130f..d31b6c72b476 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
        spin_lock(&sb->s_inode_list_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                spin_lock(&inode->i_lock);
+                /*
+                 * We must skip inodes in unusual state. We may also skip
+                 * inodes without pages but we deliberately won't in case
+                 * we need to reschedule to avoid softlockups.
+                 */
                if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-                    (inode->i_mapping->nrpages == 0)) {
+                    (inode->i_mapping->nrpages == 0 && !need_resched())) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }
@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
                spin_unlock(&inode->i_lock);
                spin_unlock(&sb->s_inode_list_lock);
+                cond_resched();
                invalidate_mapping_pages(inode->i_mapping, 0, -1);
                iput(toput_inode);
                toput_inode = inode;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 712f00995390..5508baa11bb6 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -116,16 +116,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                goto out;
        }
-        ret = file_write_and_wait_range(file, start, end);
-        if (ret)
-                return ret;
        if (!journal) {
-                struct writeback_control wbc = {
+                ret = __generic_file_fsync(file, start, end, datasync);
-                        .sync_mode = WB_SYNC_ALL
-                };
-                ret = ext4_write_inode(inode, &wbc);
                if (!ret)
                        ret = ext4_sync_parent(inode);
                if (test_opt(inode->i_sb, BARRIER))
@@ -133,6 +125,9 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                goto out;
        }
+        ret = file_write_and_wait_range(file, start, end);
+        if (ret)
+                return ret;
        /*
         * data=writeback,ordered:
         *  The caller's filemap_fdatawrite()/wait will sync the data.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b40168fcc94a..36855c1f8daf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context {
        struct work_struct      work;
 };
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+        down_write(&bdi->wb_switch_rwsem);
+}
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
+{
+        up_write(&bdi->wb_switch_rwsem);
+}
 static void inode_switch_wbs_work_fn(struct work_struct *work)
 {
        struct inode_switch_wbs_context *isw =
                container_of(work, struct inode_switch_wbs_context, work);
        struct inode *inode = isw->inode;
+        struct backing_dev_info *bdi = inode_to_bdi(inode);
        struct address_space *mapping = inode->i_mapping;
        struct bdi_writeback *old_wb = inode->i_wb;
        struct bdi_writeback *new_wb = isw->new_wb;
@@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
        bool switched = false;
        /*
+         * If @inode switches cgwb membership while sync_inodes_sb() is
+         * being issued, sync_inodes_sb() might miss it.  Synchronize.
+         */
+        down_read(&bdi->wb_switch_rwsem);
+        /*
         * By the time control reaches here, RCU grace period has passed
         * since I_WB_SWITCH assertion and all wb stat update transactions
         * between unlocked_inode_to_wb_begin/end() are guaranteed to be
@@ -428,6 +445,8 @@ skip_switch:
        spin_unlock(&new_wb->list_lock);
        spin_unlock(&old_wb->list_lock);
+        up_read(&bdi->wb_switch_rwsem);
        if (switched) {
                wb_wakeup(new_wb);
                wb_put(old_wb);
@@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
        if (inode->i_state & I_WB_SWITCH)
                return;
+        /*
+         * Avoid starting new switches while sync_inodes_sb() is in
+         * progress.  Otherwise, if the down_write protected issue path
+         * blocks heavily, we might end up starting a large number of
+         * switches which will block on the rwsem.
+         */
+        if (!down_read_trylock(&bdi->wb_switch_rwsem))
+                return;
        isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
        if (!isw)
-                return;
+                goto out_unlock;
        /* find and pin the new wb */
        rcu_read_lock();
@@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
         * Let's continue after I_WB_SWITCH is guaranteed to be visible.
         */
        call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
-        return;
+        goto out_unlock;
 out_free:
        if (isw->new_wb)
                wb_put(isw->new_wb);
        kfree(isw);
+out_unlock:
+        up_read(&bdi->wb_switch_rwsem);
 }
 /**
@@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init);
 #else   /* CONFIG_CGROUP_WRITEBACK */
+static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
 static struct bdi_writeback *
 locked_inode_to_wb_and_lock_list(struct inode *inode)
        __releases(&inode->i_lock)
@@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb)
                return;
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
+        /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
+        bdi_down_write_wb_switch_rwsem(bdi);
        bdi_split_work_to_wbs(bdi, &work, false);
        wb_wait_for_completion(bdi, &done);
+        bdi_up_write_wb_switch_rwsem(bdi);
        wait_sb_inodes(sb);
 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a5e516a40e7a..809c0f2f9942 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1742,7 +1742,6 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        req->in.h.nodeid = outarg->nodeid;
        req->in.numargs = 2;
        req->in.argpages = 1;
-        req->page_descs[0].offset = offset;
        req->end = fuse_retrieve_end;
        index = outarg->offset >> PAGE_SHIFT;
@@ -1757,6 +1756,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                this_num = min_t(unsigned, num, PAGE_SIZE - offset);
                req->pages[req->num_pages] = page;
+                req->page_descs[req->num_pages].offset = offset;
                req->page_descs[req->num_pages].length = this_num;
                req->num_pages++;
@@ -2077,8 +2077,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
        ret = fuse_dev_do_write(fud, &cs, len);
+        pipe_lock(pipe);
        for (idx = 0; idx < nbuf; idx++)
                pipe_buf_release(pipe, &bufs[idx]);
+        pipe_unlock(pipe);
 out:
        kvfree(bufs);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ffaffe18352a..a59c16bd90ac 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1782,7 +1782,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
                spin_unlock(&fc->lock);
                dec_wb_stat(&bdi->wb, WB_WRITEBACK);
-                dec_node_page_state(page, NR_WRITEBACK_TEMP);
+                dec_node_page_state(new_req->pages[0], NR_WRITEBACK_TEMP);
                wb_writeout_inc(&bdi->wb);
                fuse_writepage_free(fc, new_req);
                fuse_request_free(new_req);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 76baaa6be393..c2d4099429be 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -628,6 +628,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
        get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
        fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
        fc->user_ns = get_user_ns(user_ns);
+        fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -1162,7 +1163,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        fc->user_id = d.user_id;
        fc->group_id = d.group_id;
        fc->max_read = max_t(unsigned, 4096, d.max_read);
-        fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
        /* Used by get_root_inode() */
        sb->s_fs_info = fc;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index f15b4c57c4bd..78510ab91835 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,7 +28,6 @@
 #include "util.h"
 #include "trans.h"
 #include "dir.h"
-#include "lops.h"
 struct workqueue_struct *gfs2_freeze_wq;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5bfaf381921a..b8830fda51e8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -733,7 +733,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
        lh->lh_crc = cpu_to_be32(crc);
        gfs2_log_write(sdp, page, sb->s_blocksize, 0, addr);
-        gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
+        gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, op_flags);
        log_flush_wait(sdp);
 }
@@ -810,7 +810,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
        gfs2_ordered_write(sdp);
        lops_before_commit(sdp, tr);
-        gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+        gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE, 0);
        if (sdp->sd_log_head != sdp->sd_log_flush_head) {
                log_flush_wait(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 94dcab655bc0..2295042bc625 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,9 +17,7 @@
 #include <linux/bio.h>
 #include <linux/fs.h>
 #include <linux/list_sort.h>
-#include <linux/blkdev.h>
-#include "bmap.h"
 #include "dir.h"
 #include "gfs2.h"
 #include "incore.h"
@@ -195,6 +193,7 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
 /**
 * gfs2_end_log_write - end of i/o to the log
 * @bio: The bio
+ * @error: Status of i/o request
 *
 * Each bio_vec contains either data from the pagecache or data
 * relating to the log itself. Here we iterate over the bio_vec
@@ -231,19 +230,20 @@ static void gfs2_end_log_write(struct bio *bio)
 /**
 * gfs2_log_submit_bio - Submit any pending log bio
 * @biop: Address of the bio pointer
- * @opf: REQ_OP | op_flags
+ * @op: REQ_OP
+ * @op_flags: req_flag_bits
 *
 * Submit any pending part-built or full bio to the block device. If
 * there is no pending bio, then this is a no-op.
 */
-void gfs2_log_submit_bio(struct bio **biop, int opf)
+void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags)
 {
        struct bio *bio = *biop;
        if (bio) {
                struct gfs2_sbd *sdp = bio->bi_private;
                atomic_inc(&sdp->sd_log_in_flight);
-                bio->bi_opf = opf;
+                bio_set_op_attrs(bio, op, op_flags);
                submit_bio(bio);
                *biop = NULL;
        }
@@ -304,7 +304,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
                nblk >>= sdp->sd_fsb2bb_shift;
                if (blkno == nblk && !flush)
                        return bio;
-                gfs2_log_submit_bio(biop, op);
+                gfs2_log_submit_bio(biop, op, 0);
        }
        *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
@@ -375,184 +375,6 @@ void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
                       gfs2_log_bmap(sdp));
 }
-/**
- * gfs2_end_log_read - end I/O callback for reads from the log
- * @bio: The bio
- *
- * Simply unlock the pages in the bio. The main thread will wait on them and
- * process them in order as necessary.
- */
-static void gfs2_end_log_read(struct bio *bio)
-{
-        struct page *page;
-        struct bio_vec *bvec;
-        int i;
-        bio_for_each_segment_all(bvec, bio, i) {
-                page = bvec->bv_page;
-                if (bio->bi_status) {
-                        int err = blk_status_to_errno(bio->bi_status);
-                        SetPageError(page);
-                        mapping_set_error(page->mapping, err);
-                }
-                unlock_page(page);
-        }
-        bio_put(bio);
-}
-/**
- * gfs2_jhead_pg_srch - Look for the journal head in a given page.
- * @jd: The journal descriptor
- * @page: The page to look in
- *
- * Returns: 1 if found, 0 otherwise.
- */
-static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
-                              struct gfs2_log_header_host *head,
-                              struct page *page)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-        struct gfs2_log_header_host uninitialized_var(lh);
-        void *kaddr = kmap_atomic(page);
-        unsigned int offset;
-        bool ret = false;
-        for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
-                if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
-                        if (lh.lh_sequence > head->lh_sequence)
-                                *head = lh;
-                        else {
-                                ret = true;
-                                break;
-                        }
-                }
-        }
-        kunmap_atomic(kaddr);
-        return ret;
-}
-/**
- * gfs2_jhead_process_page - Search/cleanup a page
- * @jd: The journal descriptor
- * @index: Index of the page to look into
- * @done: If set, perform only cleanup, else search and set if found.
- *
- * Find the page with 'index' in the journal's mapping. Search the page for
- * the journal head if requested (cleanup == false). Release refs on the
- * page so the page cache can reclaim it (put_page() twice). We grabbed a
- * reference on this page two times, first when we did a find_or_create_page()
- * to obtain the page to add it to the bio and second when we do a
- * find_get_page() here to get the page to wait on while I/O on it is being
- * completed.
- * This function is also used to free up a page we might've grabbed but not
- * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
- * submitted the I/O, but we already found the jhead so we only need to drop
- * our references to the page.
- */
-static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
-                                    struct gfs2_log_header_host *head,
-                                    bool *done)
-{
-        struct page *page;
-        page = find_get_page(jd->jd_inode->i_mapping, index);
-        wait_on_page_locked(page);
-        if (PageError(page))
-                *done = true;
-        if (!*done)
-                *done = gfs2_jhead_pg_srch(jd, head, page);
-        put_page(page); /* Once for find_get_page */
-        put_page(page); /* Once more for find_or_create_page */
-}
-/**
- * gfs2_find_jhead - find the head of a log
- * @jd: The journal descriptor
- * @head: The log descriptor for the head of the log is returned here
- *
- * Do a search of a journal by reading it in large chunks using bios and find
- * the valid log entry with the highest sequence number.  (i.e. the log head)
- *
- * Returns: 0 on success, errno otherwise
- */
-int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
-        struct address_space *mapping = jd->jd_inode->i_mapping;
-        struct gfs2_journal_extent *je;
-        u32 block, read_idx = 0, submit_idx = 0, index = 0;
-        int shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
-        int blocks_per_page = 1 << shift, sz, ret = 0;
-        struct bio *bio = NULL;
-        struct page *page;
-        bool done = false;
-        errseq_t since;
-        memset(head, 0, sizeof(*head));
-        if (list_empty(&jd->extent_list))
-                gfs2_map_journal_extents(sdp, jd);
-        since = filemap_sample_wb_err(mapping);
-        list_for_each_entry(je, &jd->extent_list, list) {
-                for (block = 0; block < je->blocks; block += blocks_per_page) {
-                        index = (je->lblock + block) >> shift;
-                        page = find_or_create_page(mapping, index, GFP_NOFS);
-                        if (!page) {
-                                ret = -ENOMEM;
-                                done = true;
-                                goto out;
-                        }
-                        if (bio) {
-                                sz = bio_add_page(bio, page, PAGE_SIZE, 0);
-                                if (sz == PAGE_SIZE)
-                                        goto page_added;
-                                submit_idx = index;
-                                submit_bio(bio);
-                                bio = NULL;
-                        }
-                        bio = gfs2_log_alloc_bio(sdp,
-                                                 je->dblock + (index << shift),
-                                                 gfs2_end_log_read);
-                        bio->bi_opf = REQ_OP_READ;
-                        sz = bio_add_page(bio, page, PAGE_SIZE, 0);
-                        gfs2_assert_warn(sdp, sz == PAGE_SIZE);
-page_added:
-                        if (submit_idx <= read_idx + BIO_MAX_PAGES) {
-                                /* Keep at least one bio in flight */
-                                continue;
-                        }
-                        gfs2_jhead_process_page(jd, read_idx++, head, &done);
-                        if (done)
-                                goto out;  /* found */
-                }
-        }
-out:
-        if (bio)
-                submit_bio(bio);
-        while (read_idx <= index)
-                gfs2_jhead_process_page(jd, read_idx++, head, &done);
-        if (!ret)
-                ret = filemap_check_wb_err(mapping, since);
-        return ret;
-}
 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
                                      u32 ld_length, u32 ld_data1)
 {
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 331160fc568b..711c4d89c063 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -30,10 +30,8 @@ extern u64 gfs2_log_bmap(struct gfs2_sbd *sdp);
 extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
                           unsigned size, unsigned offset, u64 blkno);
 extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
-extern void gfs2_log_submit_bio(struct bio **biop, int opf);
+extern void gfs2_log_submit_bio(struct bio **biop, int op, int op_flags);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
-extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
-                           struct gfs2_log_header_host *head);
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1179763f6370..b041cb8ae383 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -41,7 +41,6 @@
 #include "dir.h"
 #include "meta_io.h"
 #include "trace_gfs2.h"
-#include "lops.h"
 #define DO 0
 #define UNDO 1
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 7389e445a7a7..2dac43065382 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -182,6 +182,129 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 }
 /**
+ * find_good_lh - find a good log header
+ * @jd: the journal
+ * @blk: the segment to start searching from
+ * @lh: the log header to fill in
+ * @forward: if true search forward in the log, else search backward
+ *
+ * Call get_log_header() to get a log header for a segment, but if the
+ * segment is bad, either scan forward or backward until we find a good one.
+ *
+ * Returns: errno
+ */
+static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
+                        struct gfs2_log_header_host *head)
+{
+        unsigned int orig_blk = *blk;
+        int error;
+        for (;;) {
+                error = get_log_header(jd, *blk, head);
+                if (error <= 0)
+                        return error;
+                if (++*blk == jd->jd_blocks)
+                        *blk = 0;
+                if (*blk == orig_blk) {
+                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
+                        return -EIO;
+                }
+        }
+}
+/**
+ * jhead_scan - make sure we've found the head of the log
+ * @jd: the journal
+ * @head: this is filled in with the log descriptor of the head
+ *
+ * At this point, seg and lh should be either the head of the log or just
+ * before.  Scan forward until we find the head.
+ *
+ * Returns: errno
+ */
+static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
+{
+        unsigned int blk = head->lh_blkno;
+        struct gfs2_log_header_host lh;
+        int error;
+        for (;;) {
+                if (++blk == jd->jd_blocks)
+                        blk = 0;
+                error = get_log_header(jd, blk, &lh);
+                if (error < 0)
+                        return error;
+                if (error == 1)
+                        continue;
+                if (lh.lh_sequence == head->lh_sequence) {
+                        gfs2_consist_inode(GFS2_I(jd->jd_inode));
+                        return -EIO;
+                }
+                if (lh.lh_sequence < head->lh_sequence)
+                        break;
+                *head = lh;
+        }
+        return 0;
+}
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: the journal
+ * @head: the log descriptor for the head of the log is returned here
+ *
+ * Do a binary search of a journal and find the valid log entry with the
+ * highest sequence number.  (i.e. the log head)
+ *
+ * Returns: errno
+ */
+int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
+{
+        struct gfs2_log_header_host lh_1, lh_m;
+        u32 blk_1, blk_2, blk_m;
+        int error;
+        blk_1 = 0;
+        blk_2 = jd->jd_blocks - 1;
+        for (;;) {
+                blk_m = (blk_1 + blk_2) / 2;
+                error = find_good_lh(jd, &blk_1, &lh_1);
+                if (error)
+                        return error;
+                error = find_good_lh(jd, &blk_m, &lh_m);
+                if (error)
+                        return error;
+                if (blk_1 == blk_m || blk_m == blk_2)
+                        break;
+                if (lh_1.lh_sequence <= lh_m.lh_sequence)
+                        blk_1 = blk_m;
+                else
+                        blk_2 = blk_m;
+        }
+        error = jhead_scan(jd, &lh_1);
+        if (error)
+                return error;
+        *head = lh_1;
+        return error;
+}
+/**
 * foreach_descriptor - go through the active part of the log
 * @jd: the journal
 * @start: the first log header in the active region
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 99575ab81202..11d81248be85 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -27,6 +27,8 @@ extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
 extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where);
 extern void gfs2_revoke_clean(struct gfs2_jdesc *jd);
+extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
+                    struct gfs2_log_header_host *head);
 extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait);
 extern void gfs2_recover_func(struct work_struct *work);
 extern int __get_log_header(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 831d7cb5a49c..17a8d3b43990 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1780,9 +1780,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
                        goto next_iter;
                }
                if (ret == -E2BIG) {
-                        n += rbm->bii - initial_bii;
                        rbm->bii = 0;
                        rbm->offset = 0;
+                        n += (rbm->bii - initial_bii);
                        goto res_covered_end_of_rgrp;
                }
                return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index d4b11c903971..ca71163ff7cf 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -45,7 +45,6 @@
 #include "util.h"
 #include "sys.h"
 #include "xattr.h"
-#include "lops.h"
 #define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2fcea5f8225..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -383,16 +383,17 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
 * truncation is indicated by end of range being LLONG_MAX
 *      In this case, we first scan the range and release found pages.
 *      After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
- *      maps and global counts.
+ *      maps and global counts.  Page faults can not race with truncation
+ *      in this routine.  hugetlb_no_page() prevents page faults in the
+ *      truncated range.  It checks i_size before allocation, and again after
+ *      with the page table lock for the page held.  The same lock must be
+ *      acquired to unmap a page.
 * hole punch is indicated if end is not LLONG_MAX
 *      In the hole punch case we scan the range and release found pages.
 *      Only when releasing a page is the associated region/reserv map
 *      deleted.  The region/reserv map for ranges without associated
- *      pages are not modified.
+ *      pages are not modified.  Page faults can race with hole punch.
- *
+ *      This is indicated if we find a mapped page.
- * Callers of this routine must hold the i_mmap_rwsem in write mode to prevent
- * races with page faults.
- *
 * Note: If the passed end of range value is beyond the end of file, but
 * not LLONG_MAX this routine still performs a hole punch operation.
 */
@@ -422,14 +423,32 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
                for (i = 0; i < pagevec_count(&pvec); ++i) {
                        struct page *page = pvec.pages[i];
+                        u32 hash;
                        index = page->index;
+                        hash = hugetlb_fault_mutex_hash(h, current->mm,
+                                                        &pseudo_vma,
+                                                        mapping, index, 0);
+                        mutex_lock(&hugetlb_fault_mutex_table[hash]);
                        /*
-                         * A mapped page is impossible as callers should unmap
+                         * If page is mapped, it was faulted in after being
-                         * all references before calling.  And, i_mmap_rwsem
+                         * unmapped in caller.  Unmap (again) now after taking
-                         * prevents the creation of additional mappings.
+                         * the fault mutex.  The mutex will prevent faults
+                         * until we finish removing the page.
+                         *
+                         * This race can only happen in the hole punch case.
+                         * Getting here in a truncate operation is a bug.
                         */
-                        VM_BUG_ON(page_mapped(page));
+                        if (unlikely(page_mapped(page))) {
+                                BUG_ON(truncate_op);
+                                i_mmap_lock_write(mapping);
+                                hugetlb_vmdelete_list(&mapping->i_mmap,
+                                        index * pages_per_huge_page(h),
+                                        (index + 1) * pages_per_huge_page(h));
+                                i_mmap_unlock_write(mapping);
+                        }
                        lock_page(page);
                        /*
@@ -451,6 +470,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
                        }
                        unlock_page(page);
+                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                }
                huge_pagevec_release(&pvec);
                cond_resched();
@@ -462,20 +482,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
-        struct address_space *mapping = inode->i_mapping;
        struct resv_map *resv_map;
-        /*
-         * The vfs layer guarantees that there are no other users of this
-         * inode.  Therefore, it would be safe to call remove_inode_hugepages
-         * without holding i_mmap_rwsem.  We acquire and hold here to be
-         * consistent with other callers.  Since there will be no contention
-         * on the semaphore, overhead is negligible.
-         */
-        i_mmap_lock_write(mapping);
        remove_inode_hugepages(inode, 0, LLONG_MAX);
-        i_mmap_unlock_write(mapping);
        resv_map = (struct resv_map *)inode->i_mapping->private_data;
        /* root inode doesn't have the resv_map, so we should check it */
        if (resv_map)
@@ -496,8 +505,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
        i_mmap_lock_write(mapping);
        if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
                hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
-        remove_inode_hugepages(inode, offset, LLONG_MAX);
        i_mmap_unlock_write(mapping);
+        remove_inode_hugepages(inode, offset, LLONG_MAX);
        return 0;
 }
@@ -531,8 +540,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                        hugetlb_vmdelete_list(&mapping->i_mmap,
                                                hole_start >> PAGE_SHIFT,
                                                hole_end  >> PAGE_SHIFT);
-                remove_inode_hugepages(inode, hole_start, hole_end);
                i_mmap_unlock_write(mapping);
+                remove_inode_hugepages(inode, hole_start, hole_end);
                inode_unlock(inode);
        }
@@ -615,11 +624,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                /* addr is the offset within the file (zero based) */
                addr = index * hpage_size;
-                /*
+                /* mutex taken here, fault path and hole punch */
-                 * fault mutex taken here, protects against fault path
-                 * and hole punch.  inode_lock previously taken protects
-                 * against truncation.
-                 */
                hash = hugetlb_fault_mutex_hash(h, mm, &pseudo_vma, mapping,
                                                index, addr);
                mutex_lock(&hugetlb_fault_mutex_table[hash]);
diff --git a/fs/inode.c b/fs/inode.c
index 0cd47fe0dbe5..73432e64f874 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -730,11 +730,8 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
                return LRU_REMOVED;
        }
-        /*
+        /* recently referenced inodes get one more pass */
-         * Recently referenced inodes and inodes with many attached pages
+        if (inode->i_state & I_REFERENCED) {
-         * get one more pass.
-         */
-        if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
                inode->i_state &= ~I_REFERENCED;
                spin_unlock(&inode->i_lock);
                return LRU_ROTATE;
diff --git a/fs/iomap.c b/fs/iomap.c
index a3088fae567b..897c60215dd1 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -116,6 +116,12 @@ iomap_page_create(struct inode *inode, struct page *page)
        atomic_set(&iop->read_count, 0);
        atomic_set(&iop->write_count, 0);
        bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+        /*
+         * migrate_page_move_mapping() assumes that pages with private data have
+         * their count elevated by 1.
+         */
+        get_page(page);
        set_page_private(page, (unsigned long)iop);
        SetPagePrivate(page);
        return iop;
@@ -132,6 +138,7 @@ iomap_page_release(struct page *page)
        WARN_ON_ONCE(atomic_read(&iop->write_count));
        ClearPagePrivate(page);
        set_page_private(page, 0);
+        put_page(page);
        kfree(iop);
 }
@@ -569,8 +576,10 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage,
        if (page_has_private(page)) {
                ClearPagePrivate(page);
+                get_page(newpage);
                set_page_private(newpage, page_private(page));
                set_page_private(page, 0);
+                put_page(page);
                SetPagePrivate(newpage);
        }
@@ -1804,6 +1813,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        loff_t pos = iocb->ki_pos, start = pos;
        loff_t end = iocb->ki_pos + count - 1, ret = 0;
        unsigned int flags = IOMAP_DIRECT;
+        bool wait_for_completion = is_sync_kiocb(iocb);
        struct blk_plug plug;
        struct iomap_dio *dio;
@@ -1823,7 +1833,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        dio->end_io = end_io;
        dio->error = 0;
        dio->flags = 0;
-        dio->wait_for_completion = is_sync_kiocb(iocb);
        dio->submit.iter = iter;
        dio->submit.waiter = current;
@@ -1878,7 +1887,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                dio_warn_stale_pagecache(iocb->ki_filp);
        ret = 0;
-        if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
+        if (iov_iter_rw(iter) == WRITE && !wait_for_completion &&
            !inode->i_sb->s_dio_done_wq) {
                ret = sb_init_dio_done_wq(inode->i_sb);
                if (ret < 0)
@@ -1894,7 +1903,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                if (ret <= 0) {
                        /* magic error code to fall back to buffered I/O */
                        if (ret == -ENOTBLK) {
-                                dio->wait_for_completion = true;
+                                wait_for_completion = true;
                                ret = 0;
                        }
                        break;
@@ -1916,8 +1925,24 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (dio->flags & IOMAP_DIO_WRITE_FUA)
                dio->flags &= ~IOMAP_DIO_NEED_SYNC;
+        /*
+         * We are about to drop our additional submission reference, which
+         * might be the last reference to the dio.  There are three three
+         * different ways we can progress here:
+         *
+         *  (a) If this is the last reference we will always complete and free
+         *      the dio ourselves.
+         *  (b) If this is not the last reference, and we serve an asynchronous
+         *      iocb, we must never touch the dio after the decrement, the
+         *      I/O completion handler will complete and free it.
+         *  (c) If this is not the last reference, but we serve a synchronous
+         *      iocb, the I/O completion handler will wake us up on the drop
+         *      of the final reference, and we will complete and free it here
+         *      after we got woken by the I/O completion handler.
+         */
+        dio->wait_for_completion = wait_for_completion;
        if (!atomic_dec_and_test(&dio->ref)) {
-                if (!dio->wait_for_completion)
+                if (!wait_for_completion)
                        return -EIOCBQUEUED;
                for (;;) {
@@ -1934,9 +1959,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                __set_current_state(TASK_RUNNING);
        }
-        ret = iomap_dio_complete(dio);
+        return iomap_dio_complete(dio);
-        return ret;
 out_free_dio:
        kfree(dio);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 46d691ba04bc..45b2322e092d 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,15 +133,9 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
                                    struct file *file_out, loff_t pos_out,
                                    size_t count, unsigned int flags)
 {
-        ssize_t ret;
        if (file_inode(file_in) == file_inode(file_out))
                return -EINVAL;
-retry:
+        return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
-        ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
-        if (ret == -EAGAIN)
-                goto retry;
-        return ret;
 }
 static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 3f23b6840547..bf34ddaa2ad7 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -44,6 +44,7 @@
 #include <linux/keyctl.h>
 #include <linux/key-type.h>
 #include <keys/user-type.h>
+#include <keys/request_key_auth-type.h>
 #include <linux/module.h>
 #include "internal.h"
@@ -59,7 +60,7 @@ static struct key_type key_type_id_resolver_legacy;
 struct idmap_legacy_upcalldata {
        struct rpc_pipe_msg pipe_msg;
        struct idmap_msg idmap_msg;
-        struct key_construction *key_cons;
+        struct key      *authkey;
        struct idmap *idmap;
 };
@@ -384,7 +385,7 @@ static const match_table_t nfs_idmap_tokens = {
        { Opt_find_err, NULL }
 };
-static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
+static int nfs_idmap_legacy_upcall(struct key *, void *);
 static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
                                   size_t);
 static void idmap_release_pipe(struct inode *);
@@ -549,11 +550,12 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
 static void
 nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
 {
-        struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
+        struct key *authkey = idmap->idmap_upcall_data->authkey;
        kfree(idmap->idmap_upcall_data);
        idmap->idmap_upcall_data = NULL;
-        complete_request_key(cons, ret);
+        complete_request_key(authkey, ret);
+        key_put(authkey);
 }
 static void
@@ -563,15 +565,14 @@ nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
                nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
 }
-static int nfs_idmap_legacy_upcall(struct key_construction *cons,
+static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
-                                   const char *op,
-                                   void *aux)
 {
        struct idmap_legacy_upcalldata *data;
+        struct request_key_auth *rka = get_request_key_auth(authkey);
        struct rpc_pipe_msg *msg;
        struct idmap_msg *im;
        struct idmap *idmap = (struct idmap *)aux;
-        struct key *key = cons->key;
+        struct key *key = rka->target_key;
        int ret = -ENOKEY;
        if (!aux)
@@ -586,7 +587,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
        msg = &data->pipe_msg;
        im = &data->idmap_msg;
        data->idmap = idmap;
-        data->key_cons = cons;
+        data->authkey = key_get(authkey);
        ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
        if (ret < 0)
@@ -604,7 +605,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
 out2:
        kfree(data);
 out1:
-        complete_request_key(cons, ret);
+        complete_request_key(authkey, ret);
        return ret;
 }
@@ -651,9 +652,10 @@ out:
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
+        struct request_key_auth *rka;
        struct rpc_inode *rpci = RPC_I(file_inode(filp));
        struct idmap *idmap = (struct idmap *)rpci->private;
-        struct key_construction *cons;
+        struct key *authkey;
        struct idmap_msg im;
        size_t namelen_in;
        int ret = -ENOKEY;
@@ -665,7 +667,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        if (idmap->idmap_upcall_data == NULL)
                goto out_noupcall;
-        cons = idmap->idmap_upcall_data->key_cons;
+        authkey = idmap->idmap_upcall_data->authkey;
+        rka = get_request_key_auth(authkey);
        if (mlen != sizeof(im)) {
                ret = -ENOSPC;
@@ -690,9 +693,9 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
        ret = nfs_idmap_read_and_verify_message(&im,
                        &idmap->idmap_upcall_data->idmap_msg,
-                        cons->key, cons->authkey);
+                        rka->target_key, authkey);
        if (ret >= 0) {
-                key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+                key_set_timeout(rka->target_key, nfs_idmap_cache_timeout);
                ret = mlen;
        }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 22ce3c8a2f46..0570391eaa16 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1895,6 +1895,11 @@ static int nfs_parse_devname(const char *dev_name,
        size_t len;
        char *end;
+        if (unlikely(!dev_name || !*dev_name)) {
+                dfprintk(MOUNT, "NFS: device name not specified\n");
+                return -EINVAL;
+        }
        /* Is the host name protected with square brakcets? */
        if (*dev_name == '[') {
                end = strchr(++dev_name, ']');
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5a0bbf917a32..d09c9f878141 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -238,9 +238,9 @@ out:
 }
 /* A writeback failed: mark the page as bad, and invalidate the page cache */
-static void nfs_set_pageerror(struct page *page)
+static void nfs_set_pageerror(struct address_space *mapping)
 {
-        nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
+        nfs_zap_mapping(mapping->host, mapping);
 }
 /*
@@ -621,11 +621,12 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
        nfs_set_page_writeback(page);
        WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
-        ret = 0;
+        ret = req->wb_context->error;
        /* If there is a fatal error that covers this write, just exit */
-        if (nfs_error_is_fatal_on_server(req->wb_context->error))
+        if (nfs_error_is_fatal_on_server(ret))
                goto out_launder;
+        ret = 0;
        if (!nfs_pageio_add_request(pgio, req)) {
                ret = pgio->pg_error;
                /*
@@ -635,9 +636,9 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
                        nfs_context_set_write_error(req->wb_context, ret);
                        if (nfs_error_is_fatal_on_server(ret))
                                goto out_launder;
-                }
+                } else
+                        ret = -EAGAIN;
                nfs_redirty_request(req);
-                ret = -EAGAIN;
        } else
                nfs_add_stats(page_file_mapping(page)->host,
                                NFSIOS_WRITEPAGES, 1);
@@ -993,7 +994,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
                nfs_list_remove_request(req);
                if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
                    (hdr->good_bytes < bytes)) {
-                        nfs_set_pageerror(req->wb_page);
+                        nfs_set_pageerror(page_file_mapping(req->wb_page));
                        nfs_context_set_write_error(req->wb_context, hdr->error);
                        goto remove_req;
                }
@@ -1347,7 +1348,8 @@ int nfs_updatepage(struct file *file, struct page *page,
                unsigned int offset, unsigned int count)
 {
        struct nfs_open_context *ctx = nfs_file_open_context(file);
-        struct inode    *inode = page_file_mapping(page)->host;
+        struct address_space *mapping = page_file_mapping(page);
+        struct inode    *inode = mapping->host;
        int             status = 0;
        nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -1365,7 +1367,7 @@ int nfs_updatepage(struct file *file, struct page *page,
        status = nfs_writepage_setup(ctx, page, offset, count);
        if (status < 0)
-                nfs_set_pageerror(page);
+                nfs_set_pageerror(mapping);
        else
                __set_page_dirty_nobuffers(page);
 out:
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b33f9785b756..72a7681f4046 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1239,8 +1239,8 @@ static __net_init int nfsd_init_net(struct net *net)
        retval = nfsd_idmap_init(net);
        if (retval)
                goto out_idmap_error;
-        nn->nfsd4_lease = 45;   /* default lease time */
+        nn->nfsd4_lease = 90;   /* default lease time */
-        nn->nfsd4_grace = 45;
+        nn->nfsd4_grace = 90;
        nn->somebody_reclaimed = false;
        nn->clverifier_counter = prandom_u32();
        nn->clientid_counter = prandom_u32();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9824e32b2f23..7dc98e14655d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -557,9 +557,11 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
        loff_t cloned;
        cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
+        if (cloned < 0)
+                return nfserrno(cloned);
        if (count && cloned != count)
-                cloned = -EINVAL;
+                return nfserrno(-EINVAL);
-        return nfserrno(cloned < 0 ? cloned : 0);
+        return 0;
 }
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 105576daca4a..798f1253141a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -724,8 +724,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
                return -EBADF;
        /* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
-        if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
+        if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) {
-                return -EINVAL;
+                ret = -EINVAL;
+                goto fput_and_out;
+        }
        /* verify that this is indeed an inotify instance */
        if (unlikely(f.file->f_op != &inotify_fops)) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 633a63462573..f5ed9512d193 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1086,10 +1086,6 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
                        task_lock(p);
                        if (!p->vfork_done && process_shares_mm(p, mm)) {
-                                pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
-                                                task_pid_nr(p), p->comm,
-                                                p->signal->oom_score_adj, oom_adj,
-                                                task_pid_nr(task), task->comm);
                                p->signal->oom_score_adj = oom_adj;
                                if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
                                        p->signal->oom_score_adj_min = (short)oom_adj;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8ae109429a88..e39bac94dead 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
                inode = proc_get_inode(dir->i_sb, de);
                if (!inode)
                        return ERR_PTR(-ENOMEM);
-                d_set_d_op(dentry, &proc_misc_dentry_ops);
+                d_set_d_op(dentry, de->proc_dops);
                return d_splice_alias(inode, dentry);
        }
        read_unlock(&proc_subdir_lock);
@@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
        INIT_LIST_HEAD(&ent->pde_openers);
        proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+        ent->proc_dops = &proc_misc_dentry_ops;
 out:
        return ent;
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5185d7f6a51e..95b14196f284 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,6 +44,7 @@ struct proc_dir_entry {
        struct completion *pde_unload_completion;
        const struct inode_operations *proc_iops;
        const struct file_operations *proc_fops;
+        const struct dentry_operations *proc_dops;
        union {
                const struct seq_operations *seq_ops;
                int (*single_show)(struct seq_file *, void *);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index d5e0fcb3439e..a7b12435519e 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode)
        return maybe_get_net(PDE_NET(PDE(inode)));
 }
+static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+        return 0;
+}
+static const struct dentry_operations proc_net_dentry_ops = {
+        .d_revalidate   = proc_net_d_revalidate,
+        .d_delete       = always_delete_dentry,
+};
+static void pde_force_lookup(struct proc_dir_entry *pde)
+{
+        /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
+        pde->proc_dops = &proc_net_dentry_ops;
+}
 static int seq_open_net(struct inode *inode, struct file *file)
 {
        unsigned int state_size = PDE(inode)->state_size;
@@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+        pde_force_lookup(p);
        p->proc_fops = &proc_net_seq_fops;
        p->seq_ops = ops;
        p->state_size = state_size;
@@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+        pde_force_lookup(p);
        p->proc_fops = &proc_net_seq_fops;
        p->seq_ops = ops;
        p->state_size = state_size;
@@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+        pde_force_lookup(p);
        p->proc_fops = &proc_net_single_fops;
        p->single_show = show;
        return proc_register(parent, p);
@@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
        p = proc_create_reg(name, mode, &parent, data);
        if (!p)
                return NULL;
+        pde_force_lookup(p);
        p->proc_fops = &proc_net_single_fops;
        p->single_show = show;
        p->write = write;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0ec9edab2f3..85b0ef890b28 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -423,7 +423,7 @@ struct mem_size_stats {
 };
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
-                bool compound, bool young, bool dirty)
+                bool compound, bool young, bool dirty, bool locked)
 {
        int i, nr = compound ? 1 << compound_order(page) : 1;
        unsigned long size = nr * PAGE_SIZE;
@@ -450,24 +450,31 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
                else
                        mss->private_clean += size;
                mss->pss += (u64)size << PSS_SHIFT;
+                if (locked)
+                        mss->pss_locked += (u64)size << PSS_SHIFT;
                return;
        }
        for (i = 0; i < nr; i++, page++) {
                int mapcount = page_mapcount(page);
+                unsigned long pss = (PAGE_SIZE << PSS_SHIFT);
                if (mapcount >= 2) {
                        if (dirty || PageDirty(page))
                                mss->shared_dirty += PAGE_SIZE;
                        else
                                mss->shared_clean += PAGE_SIZE;
-                        mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
+                        mss->pss += pss / mapcount;
+                        if (locked)
+                                mss->pss_locked += pss / mapcount;
                } else {
                        if (dirty || PageDirty(page))
                                mss->private_dirty += PAGE_SIZE;
                        else
                                mss->private_clean += PAGE_SIZE;
-                        mss->pss += PAGE_SIZE << PSS_SHIFT;
+                        mss->pss += pss;
+                        if (locked)
+                                mss->pss_locked += pss;
                }
        }
 }
@@ -490,6 +497,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 {
        struct mem_size_stats *mss = walk->private;
        struct vm_area_struct *vma = walk->vma;
+        bool locked = !!(vma->vm_flags & VM_LOCKED);
        struct page *page = NULL;
        if (pte_present(*pte)) {
@@ -532,7 +540,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
        if (!page)
                return;
-        smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte));
+        smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -541,6 +549,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 {
        struct mem_size_stats *mss = walk->private;
        struct vm_area_struct *vma = walk->vma;
+        bool locked = !!(vma->vm_flags & VM_LOCKED);
        struct page *page;
        /* FOLL_DUMP will return -EFAULT on huge zero page */
@@ -555,7 +564,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                /* pass */;
        else
                VM_BUG_ON_PAGE(1, page);
-        smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
+        smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
 }
 #else
 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -737,11 +746,8 @@ static void smap_gather_stats(struct vm_area_struct *vma,
                }
        }
 #endif
        /* mmap_sem is held in m_start */
        walk_page_vma(vma, &smaps_walk);
-        if (vma->vm_flags & VM_LOCKED)
-                mss->pss_locked += mss->pss;
 }
 #define SEQ_PUT_DEC(str, val) \
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 96f7d32cd184..898c8321b343 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -128,7 +128,6 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
                     struct pstore_record *record)
 {
        struct persistent_ram_zone *prz;
-        bool update = (record->type == PSTORE_TYPE_DMESG);
        /* Give up if we never existed or have hit the end. */
        if (!przs)
@@ -139,7 +138,7 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], int id,
                return NULL;
        /* Update old/shadowed buffer. */
-        if (update)
+        if (prz->type == PSTORE_TYPE_DMESG)
                persistent_ram_save_old(prz);
        if (!persistent_ram_old_size(prz))
@@ -711,18 +710,15 @@ static int ramoops_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct ramoops_platform_data *pdata = dev->platform_data;
+        struct ramoops_platform_data pdata_local;
        struct ramoops_context *cxt = &oops_cxt;
        size_t dump_mem_sz;
        phys_addr_t paddr;
        int err = -EINVAL;
        if (dev_of_node(dev) && !pdata) {
-                pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+                pdata = &pdata_local;
-                if (!pdata) {
+                memset(pdata, 0, sizeof(*pdata));
-                        pr_err("cannot allocate platform data buffer\n");
-                        err = -ENOMEM;
-                        goto fail_out;
-                }
                err = ramoops_parse_dt(pdev, pdata);
                if (err < 0)
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index feeae8081c22..aa85f2874a9f 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,7 +43,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
        kuid_t uid;
        kgid_t gid;
-        BUG_ON(!kobj);
+        if (WARN_ON(!kobj))
+                return -EINVAL;
        if (kobj->parent)
                parent = kobj->parent->sd;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index bb71db63c99c..51398457fe00 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -325,7 +325,8 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
        kuid_t uid;
        kgid_t gid;
-        BUG_ON(!kobj || !kobj->sd || !attr);
+        if (WARN_ON(!kobj || !kobj->sd || !attr))
+                return -EINVAL;
        kobject_get_ownership(kobj, &uid, &gid);
        return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
@@ -537,7 +538,8 @@ int sysfs_create_bin_file(struct kobject *kobj,
        kuid_t uid;
        kgid_t gid;
-        BUG_ON(!kobj || !kobj->sd || !attr);
+        if (WARN_ON(!kobj || !kobj->sd || !attr))
+                return -EINVAL;
        kobject_get_ownership(kobj, &uid, &gid);
        return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1eb2d6307663..57038604d4a8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -112,7 +112,8 @@ static int internal_create_group(struct kobject *kobj, int update,
        kgid_t gid;
        int error;
-        BUG_ON(!kobj || (!update && !kobj->sd));
+        if (WARN_ON(!kobj || (!update && !kobj->sd)))
+                return -EINVAL;
        /* Updates may happen before the object has been instantiated */
        if (unlikely(update && !kobj->sd))
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 215c225b2ca1..c4deecc80f67 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -23,7 +23,8 @@ static int sysfs_do_create_link_sd(struct kernfs_node *parent,
 {
        struct kernfs_node *kn, *target = NULL;
-        BUG_ON(!name || !parent);
+        if (WARN_ON(!name || !parent))
+                return -EINVAL;
        /*
         * We don't own @target_kobj and it may be removed at any time.
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 1c8eecfe52b8..6acf1bfa0bfe 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -768,18 +768,23 @@ xrep_findroot_block(
                if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
                                &mp->m_sb.sb_meta_uuid))
                        goto out;
+                /*
+                 * Read verifiers can reference b_ops, so we set the pointer
+                 * here.  If the verifier fails we'll reset the buffer state
+                 * to what it was before we touched the buffer.
+                 */
+                bp->b_ops = fab->buf_ops;
                fab->buf_ops->verify_read(bp);
                if (bp->b_error) {
+                        bp->b_ops = NULL;
                        bp->b_error = 0;
                        goto out;
                }
                /*
                 * Some read verifiers will (re)set b_ops, so we must be
-                 * careful not to blow away any such assignment.
+                 * careful not to change b_ops after running the verifier.
                 */
-                if (!bp->b_ops)
-                        bp->b_ops = fab->buf_ops;
        }
        /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 338b9d9984e0..d9048bcea49c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -449,6 +449,7 @@ xfs_map_blocks(
        }
        wpc->imap = imap;
+        xfs_trim_extent_eof(&wpc->imap, ip);
        trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
        return 0;
 allocate_blocks:
@@ -459,6 +460,7 @@ allocate_blocks:
        ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
               imap.br_startoff + imap.br_blockcount <= cow_fsb);
        wpc->imap = imap;
+        xfs_trim_extent_eof(&wpc->imap, ip);
        trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
        return 0;
 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index eedc5e0156ff..4f5f2ff3f70f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -776,10 +776,26 @@ _xfs_buf_read(
 }
 /*
+ * Set buffer ops on an unchecked buffer and validate it, if possible.
+ *
 * If the caller passed in an ops structure and the buffer doesn't have ops
 * assigned, set the ops and use them to verify the contents.  If the contents
 * cannot be verified, we'll clear XBF_DONE.  We assume the buffer has no
 * recorded errors and is already in XBF_DONE state.
+ *
+ * Under normal operations, every in-core buffer must have buffer ops assigned
+ * to them when the buffer is read in from disk so that we can validate the
+ * metadata.
+ *
+ * However, there are two scenarios where one can encounter in-core buffers
+ * that don't have buffer ops.  The first is during log recovery of buffers on
+ * a V4 filesystem, though these buffers are purged at the end of recovery.
+ *
+ * The other is online repair, which tries to match arbitrary metadata blocks
+ * with btree types in order to find the root.  If online repair doesn't match
+ * the buffer with /any/ btree type, the buffer remains in memory in DONE state
+ * with no ops, and a subsequent read_buf call from elsewhere will not set the
+ * ops.  This function helps us fix this situation.
 */
 int
 xfs_buf_ensure_ops(
@@ -1536,8 +1552,7 @@ __xfs_buf_submit(
                xfs_buf_ioerror(bp, -EIO);
                bp->b_flags &= ~XBF_DONE;
                xfs_buf_stale(bp);
-                if (bp->b_flags & XBF_ASYNC)
+                xfs_buf_ioend(bp);
-                        xfs_buf_ioend(bp);
                return -EIO;
        }