Merge tag v4.15 of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git

To resolve conflicts in: drivers/infiniband/hw/mlx5/main.c drivers/infiniband/hw/mlx5/qp.c From patches merged into the -rc cycle. The conflict resolution matches what linux-next has been carrying. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
author: Jason Gunthorpe <jgg@mellanox.com> 2018-01-29 15:26:40 -0500
committer: Jason Gunthorpe <jgg@mellanox.com> 2018-01-30 11:30:00 -0500
commit: e7996a9a77fc669387da43ff4823b91cc4872bd0 (patch)
tree: 617f0a128e222539d67e8cccc359f1bc4b984900 /fs
parent: b5fa635aab8f0d39a824c01991266a6d06f007fb (diff)
parent: d8a5b80568a9cb66810e75b182018e9edb68e8ff (diff)
73 files changed, 678 insertions, 342 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ff8d5bf4354f..23c7f395d718 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -895,20 +895,38 @@ error:
 * However, if we didn't have a callback promise outstanding, or it was
 * outstanding on a different server, then it won't break it either...
 */
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
+static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+                               unsigned long d_version_before,
+                               unsigned long d_version_after)
 {
+        bool dir_valid;
        int ret = 0;
+        /* There were no intervening changes on the server if the version
+         * number we got back was incremented by exactly 1.
+         */
+        dir_valid = (d_version_after == d_version_before + 1);
        if (d_really_is_positive(dentry)) {
                struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
-                if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                if (dir_valid) {
-                        kdebug("AFS_VNODE_DELETED");
+                        drop_nlink(&vnode->vfs_inode);
-                clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+                        if (vnode->vfs_inode.i_nlink == 0) {
+                                set_bit(AFS_VNODE_DELETED, &vnode->flags);
-                ret = afs_validate(vnode, key);
+                                clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
-                if (ret == -ESTALE)
+                        }
                        ret = 0;
+                } else {
+                        clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+                        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                                kdebug("AFS_VNODE_DELETED");
+                        ret = afs_validate(vnode, key);
+                        if (ret == -ESTALE)
+                                ret = 0;
+                }
                _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
        }
@@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
        struct afs_fs_cursor fc;
        struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
        struct key *key;
+        unsigned long d_version = (unsigned long)dentry->d_fsdata;
        int ret;
        _enter("{%x:%u},{%pd}",
@@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
                ret = afs_end_vnode_operation(&fc);
                if (ret == 0)
-                        ret = afs_dir_remove_link(dentry, key);
+                        ret = afs_dir_remove_link(
+                                dentry, key, d_version,
+                                (unsigned long)dvnode->status.data_version);
        }
 error_key:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 3415eb7484f6..1e81864ef0b2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        }
        read_sequnlock_excl(&vnode->cb_lock);
+        if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                clear_nlink(&vnode->vfs_inode);
        if (valid)
                goto valid;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ea1460b9b71a..e1126659f043 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 {
        struct afs_net *net = call->net;
        enum afs_call_state state;
-        u32 remote_abort;
+        u32 remote_abort = 0;
        int ret;
        _enter("{%s,%zu},,%zu,%d",
diff --git a/fs/afs/write.c b/fs/afs/write.c
index cb5f8a3df577..9370e2feb999 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                        ret = afs_fill_page(vnode, key, pos + copied,
                                            len - copied, page);
                        if (ret < 0)
-                                return ret;
+                                goto out;
                }
                SetPageUptodate(page);
        }
@@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping,
        set_page_dirty(page);
        if (PageDirty(page))
                _debug("dirtied");
+        ret = copied;
+out:
        unlock_page(page);
        put_page(page);
+        return ret;
-        return copied;
 }
 /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 8fc41705c7cd..961a12dc6dc8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
        mutex_unlock(&sbi->wq_mutex);
-        if (autofs4_write(sbi, pipe, &pkt, pktsz))
        switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
        case 0:
                break;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 531e0a8645b0..1e74cf826532 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1032,14 +1032,17 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
                    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
                        ret = btrfs_inc_ref(trans, root, buf, 1);
-                        BUG_ON(ret); /* -ENOMEM */
+                        if (ret)
+                                return ret;
                        if (root->root_key.objectid ==
                            BTRFS_TREE_RELOC_OBJECTID) {
                                ret = btrfs_dec_ref(trans, root, buf, 0);
-                                BUG_ON(ret); /* -ENOMEM */
+                                if (ret)
+                                        return ret;
                                ret = btrfs_inc_ref(trans, root, cow, 1);
-                                BUG_ON(ret); /* -ENOMEM */
+                                if (ret)
+                                        return ret;
                        }
                        new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
                } else {
@@ -1049,7 +1052,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                                ret = btrfs_inc_ref(trans, root, cow, 1);
                        else
                                ret = btrfs_inc_ref(trans, root, cow, 0);
-                        BUG_ON(ret); /* -ENOMEM */
+                        if (ret)
+                                return ret;
                }
                if (new_flags != 0) {
                        int level = btrfs_header_level(buf);
@@ -1068,9 +1072,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                                ret = btrfs_inc_ref(trans, root, cow, 1);
                        else
                                ret = btrfs_inc_ref(trans, root, cow, 0);
-                        BUG_ON(ret); /* -ENOMEM */
+                        if (ret)
+                                return ret;
                        ret = btrfs_dec_ref(trans, root, buf, 1);
-                        BUG_ON(ret); /* -ENOMEM */
+                        if (ret)
+                                return ret;
                }
                clean_tree_block(fs_info, buf);
                *last_ref = 1;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5d73f79ded8b..a6226cd6063c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
        spin_lock(&root->inode_lock);
        node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
        if (node) {
                if (btrfs_inode->delayed_node) {
                        refcount_inc(&node->refs);      /* can be accessed */
@@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
                        spin_unlock(&root->inode_lock);
                        return node;
                }
-                btrfs_inode->delayed_node = node;
-                /* can be accessed and cached in the inode */
+                /*
-                refcount_add(2, &node->refs);
+                 * It's possible that we're racing into the middle of removing
+                 * this node from the radix tree.  In this case, the refcount
+                 * was zero and it should never go back to one.  Just return
+                 * NULL like it was never in the radix at all; our release
+                 * function is in the process of removing it.
+                 *
+                 * Some implementations of refcount_inc refuse to bump the
+                 * refcount once it has hit zero.  If we don't do this dance
+                 * here, refcount_inc() may decide to just WARN_ONCE() instead
+                 * of actually bumping the refcount.
+                 *
+                 * If this node is properly in the radix, we want to bump the
+                 * refcount twice, once for the inode and once for this get
+                 * operation.
+                 */
+                if (refcount_inc_not_zero(&node->refs)) {
+                        refcount_inc(&node->refs);
+                        btrfs_inode->delayed_node = node;
+                } else {
+                        node = NULL;
+                }
                spin_unlock(&root->inode_lock);
                return node;
        }
@@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(
        mutex_unlock(&delayed_node->mutex);
        if (refcount_dec_and_test(&delayed_node->refs)) {
-                bool free = false;
                struct btrfs_root *root = delayed_node->root;
                spin_lock(&root->inode_lock);
-                if (refcount_read(&delayed_node->refs) == 0) {
+                /*
-                        radix_tree_delete(&root->delayed_nodes_tree,
+                 * Once our refcount goes to zero, nobody is allowed to bump it
-                                          delayed_node->inode_id);
+                 * back up.  We can delete it now.
-                        free = true;
+                 */
-                }
+                ASSERT(refcount_read(&delayed_node->refs) == 0);
+                radix_tree_delete(&root->delayed_nodes_tree,
+                                  delayed_node->inode_id);
                spin_unlock(&root->inode_lock);
-                if (free)
+                kmem_cache_free(delayed_node_cache, delayed_node);
-                        kmem_cache_free(delayed_node_cache, delayed_node);
        }
 }
@@ -1610,28 +1633,18 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
 int btrfs_should_delete_dir_index(struct list_head *del_list,
                                  u64 index)
 {
-        struct btrfs_delayed_item *curr, *next;
+        struct btrfs_delayed_item *curr;
-        int ret;
+        int ret = 0;
-        if (list_empty(del_list))
-                return 0;
-        list_for_each_entry_safe(curr, next, del_list, readdir_list) {
+        list_for_each_entry(curr, del_list, readdir_list) {
                if (curr->key.offset > index)
                        break;
+                if (curr->key.offset == index) {
-                list_del(&curr->readdir_list);
+                        ret = 1;
-                ret = (curr->key.offset == index);
+                        break;
+                }
-                if (refcount_dec_and_test(&curr->refs))
-                        kfree(curr);
-                if (ret)
-                        return 1;
-                else
-                        continue;
        }
-        return 0;
+        return ret;
 }
 /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 10a2a579cc7f..a8ecccfc36de 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3231,6 +3231,7 @@ static int write_dev_supers(struct btrfs_device *device,
        int errors = 0;
        u32 crc;
        u64 bytenr;
+        int op_flags;
        if (max_mirrors == 0)
                max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@@ -3273,13 +3274,10 @@ static int write_dev_supers(struct btrfs_device *device,
                 * we fua the first super.  The others we allow
                 * to go down lazy.
                 */
-                if (i == 0) {
+                op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
-                        ret = btrfsic_submit_bh(REQ_OP_WRITE,
+                if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
-                                REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh);
+                        op_flags |= REQ_FUA;
-                } else {
+                ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
-                        ret = btrfsic_submit_bh(REQ_OP_WRITE,
-                                REQ_SYNC | REQ_META | REQ_PRIO, bh);
-                }
                if (ret)
                        errors++;
        }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4497f937e8fb..2f4328511ac8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9206,6 +9206,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        ret = btrfs_del_root(trans, fs_info, &root->root_key);
        if (ret) {
                btrfs_abort_transaction(trans, ret);
+                err = ret;
                goto out_end_trans;
        }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 993061f83067..e1a7f3cb5be9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3005,6 +3005,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                compress_type = ordered_extent->compress_type;
        if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
                BUG_ON(compress_type);
+                btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
+                                       ordered_extent->len);
                ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
                                                ordered_extent->file_offset,
                                                ordered_extent->file_offset +
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d748ad1c3620..2ef8acaac688 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2206,7 +2206,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
        if (!path)
                return -ENOMEM;
-        ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
+        ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
        key.objectid = tree_id;
        key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49810b70afd3..a25684287501 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void)
                kfree(dev);
                return ERR_PTR(-ENOMEM);
        }
-        bio_get(dev->flush_bio);
        INIT_LIST_HEAD(&dev->dev_list);
        INIT_LIST_HEAD(&dev->dev_alloc_list);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ab69dcb70e8a..1b468250e947 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc,
        return request_close_session(mdsc, session);
 }
+static bool drop_negative_children(struct dentry *dentry)
+{
+        struct dentry *child;
+        bool all_negative = true;
+        if (!d_is_dir(dentry))
+                goto out;
+        spin_lock(&dentry->d_lock);
+        list_for_each_entry(child, &dentry->d_subdirs, d_child) {
+                if (d_really_is_positive(child)) {
+                        all_negative = false;
+                        break;
+                }
+        }
+        spin_unlock(&dentry->d_lock);
+        if (all_negative)
+                shrink_dcache_parent(dentry);
+out:
+        return all_negative;
+}
 /*
 * Trim old(er) caps.
 *
@@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        if ((used | wanted) & ~oissued & mine)
                goto out;   /* we need these caps */
-        session->s_trim_caps--;
        if (oissued) {
                /* we aren't the only cap.. just remove us */
                __ceph_remove_cap(cap, true);
+                session->s_trim_caps--;
        } else {
+                struct dentry *dentry;
                /* try dropping referring dentries */
                spin_unlock(&ci->i_ceph_lock);
-                d_prune_aliases(inode);
+                dentry = d_find_any_alias(inode);
-                dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
+                if (dentry && drop_negative_children(dentry)) {
-                     inode, cap, atomic_read(&inode->i_count));
+                        int count;
+                        dput(dentry);
+                        d_prune_aliases(inode);
+                        count = atomic_read(&inode->i_count);
+                        if (count == 1)
+                                session->s_trim_caps--;
+                        dout("trim_caps_cb %p cap %p pruned, count now %d\n",
+                             inode, cap, count);
+                } else {
+                        dput(dentry);
+                }
                return 0;
        }
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e06740436b92..ed88ab8a4774 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
        } while (rc == -EAGAIN);
        if (rc) {
-                cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
+                if (rc != -ENOENT)
+                        cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
                goto out;
        }
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5331631386a2..01346b8b6edb 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
        cifs_small_buf_release(req);
        rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
-        shdr = get_sync_hdr(rsp);
-        if (shdr->Status == STATUS_END_OF_FILE) {
+        if (rc) {
+                if (rc != -ENODATA) {
+                        cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
+                        cifs_dbg(VFS, "Send error in read = %d\n", rc);
+                }
                free_rsp_buf(resp_buftype, rsp_iov.iov_base);
-                return 0;
+                return rc == -ENODATA ? 0 : rc;
        }
-        if (rc) {
+        *nbytes = le32_to_cpu(rsp->DataLength);
-                cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
+        if ((*nbytes > CIFS_MAX_MSGSIZE) ||
-                cifs_dbg(VFS, "Send error in read = %d\n", rc);
+            (*nbytes > io_parms->length)) {
-        } else {
+                cifs_dbg(FYI, "bad length %d for count %d\n",
-                *nbytes = le32_to_cpu(rsp->DataLength);
+                         *nbytes, io_parms->length);
-                if ((*nbytes > CIFS_MAX_MSGSIZE) ||
+                rc = -EIO;
-                    (*nbytes > io_parms->length)) {
+                *nbytes = 0;
-                        cifs_dbg(FYI, "bad length %d for count %d\n",
-                                 *nbytes, io_parms->length);
-                        rc = -EIO;
-                        *nbytes = 0;
-                }
        }
+        shdr = get_sync_hdr(rsp);
        if (*buf) {
                memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
                free_rsp_buf(resp_buftype, rsp_iov.iov_base);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index f937082f3244..58e2fe40b2a0 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV
 config CRAMFS_MTD
        bool "Support CramFs image directly mapped in physical memory"
        depends on CRAMFS && MTD
+        depends on CRAMFS=m || MTD=y
        default y if !CRAMFS_BLOCKDEV
        help
          This option allows the CramFs driver to load data directly from
diff --git a/fs/dax.c b/fs/dax.c
index 78b72c48374e..95981591977a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
                        if (pfn != pmd_pfn(*pmdp))
                                goto unlock_pmd;
-                        if (!pmd_dirty(*pmdp)
+                        if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
-                                        && !pmd_access_permitted(*pmdp, WRITE))
                                goto unlock_pmd;
                        flush_cache_page(vma, address, pfn);
diff --git a/fs/exec.c b/fs/exec.c
index 6be2aa0ab26f..7eb8d21bcab9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,15 +1216,14 @@ killed:
        return -EAGAIN;
 }
-char *get_task_comm(char *buf, struct task_struct *tsk)
+char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
 {
-        /* buf must be at least sizeof(tsk->comm) in size */
        task_lock(tsk);
-        strncpy(buf, tsk->comm, sizeof(tsk->comm));
+        strncpy(buf, tsk->comm, buf_size);
        task_unlock(tsk);
        return buf;
 }
-EXPORT_SYMBOL_GPL(get_task_comm);
+EXPORT_SYMBOL_GPL(__get_task_comm);
 /*
 * These functions flushes out all traces of the currently running executable
@@ -1340,24 +1339,24 @@ void setup_new_exec(struct linux_binprm * bprm)
                 * avoid bad behavior from the prior rlimits. This has to
                 * happen before arch_pick_mmap_layout(), which examines
                 * RLIMIT_STACK, but after the point of no return to avoid
-                 * races from other threads changing the limits. This also
+                 * needing to clean up the change on failure.
-                 * must be protected from races with prlimit() calls.
                 */
-                task_lock(current->group_leader);
                if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
                        current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
-                if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
-                        current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
-                task_unlock(current->group_leader);
        }
        arch_pick_mmap_layout(current->mm);
        current->sas_ss_sp = current->sas_ss_size = 0;
-        /* Figure out dumpability. */
+        /*
+         * Figure out dumpability. Note that this checking only of current
+         * is wrong, but userspace depends on it. This should be testing
+         * bprm->secureexec instead.
+         */
        if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
-            bprm->secureexec)
+            !(uid_eq(current_euid(), current_uid()) &&
+              gid_eq(current_egid(), current_gid())))
                set_dumpable(current->mm, suid_dumpable);
        else
                set_dumpable(current->mm, SUID_DUMP_USER);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 07bca11749d4..c941251ac0c0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4722,6 +4722,7 @@ retry:
                                                    EXT4_INODE_EOFBLOCKS);
                }
                ext4_mark_inode_dirty(handle, inode);
+                ext4_update_inode_fsync_trans(handle, inode, 1);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b4267d72f249..b32cf263750d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
                struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
+                if (IS_ERR(p))
+                        return ERR_CAST(p);
                if (p) {
                        int acl_size = p->a_count * sizeof(ext4_acl_entry);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7df2c5644e59..534a9130f625 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
 */
 int ext4_inode_is_fast_symlink(struct inode *inode)
 {
+        if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
+                int ea_blocks = EXT4_I(inode)->i_file_acl ?
+                                EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
+                if (ext4_has_inline_data(inode))
+                        return 0;
+                return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+        }
        return S_ISLNK(inode->i_mode) && inode->i_size &&
               (inode->i_size < EXT4_N_BLOCKS * 4);
 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 798b3ac680db..e750d68fbcb5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
                               "falling back\n"));
        }
        nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+        if (!nblocks) {
+                ret = NULL;
+                goto cleanup_and_exit;
+        }
        start = EXT4_I(dir)->i_dir_start_lookup;
        if (start >= nblocks)
                start = 0;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 8d6b7e35faf9..c83ece7facc5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -150,7 +150,6 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx)
                        if (unlikely(ret < 0))
                                goto out;
                        ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1;
-                        file->f_version = inode->i_version;
                }
                next_pos = ctx->pos;
                if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) {
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 3b834563b1f1..a4ad18afbdec 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -419,7 +419,6 @@ int hpfs_add_dirent(struct inode *i,
                c = 1;
                goto ret;
        }       
-        i->i_version++;
        c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
        ret:
        return c;
@@ -726,7 +725,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
                        return 2;
                }
        }
-        i->i_version++;
        for_all_poss(i, hpfs_pos_del, (t = get_pos(dnode, de)) + 1, 1);
        hpfs_delete_de(i->i_sb, dnode, de);
        hpfs_mark_4buffers_dirty(qbh);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c45a3b9b9ac7..f2c3ebcd309c 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -235,7 +235,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
        ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
-        ei->vfs_inode.i_version = 1;
        return &ei->vfs_inode;
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index e158ec6b527b..9d1374ab6e06 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
                            SB_DIRSYNC |
                            SB_SILENT |
                            SB_POSIXACL |
+                            SB_LAZYTIME |
                            SB_I_VERSION);
        if (flags & MS_REMOUNT)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1c6b63..b9129e2befea 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
        const struct sockaddr *sap = data->addr;
        struct nfs_net *nn = net_generic(data->net, nfs_net_id);
+again:
        list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
                const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
                /* Don't match clients that failed to initialise properly */
                if (clp->cl_cons_state < 0)
                        continue;
+                /* If a client is still initializing then we need to wait */
+                if (clp->cl_cons_state > NFS_CS_READY) {
+                        refcount_inc(&clp->cl_count);
+                        spin_unlock(&nn->nfs_client_lock);
+                        nfs_wait_client_init_complete(clp);
+                        nfs_put_client(clp);
+                        spin_lock(&nn->nfs_client_lock);
+                        goto again;
+                }
                /* Different NFS versions cannot share the same nfs_client */
                if (clp->rpc_ops != data->nfs_mod->rpc_ops)
                        continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 12bbab0becb4..65a7e5da508c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
        if (error < 0)
                goto error;
-        if (!nfs4_has_session(clp))
-                nfs_mark_client_ready(clp, NFS_CS_READY);
        error = nfs4_discover_server_trunking(clp, &old);
        if (error < 0)
                goto error;
-        if (clp != old)
+        if (clp != old) {
                clp->cl_preserve_clid = true;
+                /*
+                 * Mark the client as having failed initialization so other
+                 * processes walking the nfs_client_list in nfs_match_client()
+                 * won't try to use it.
+                 */
+                nfs_mark_client_ready(clp, -EPERM);
+        }
        nfs_put_client(clp);
        clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
        return old;
@@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
        spin_lock(&nn->nfs_client_lock);
        list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+                if (pos == new)
+                        goto found;
                status = nfs4_match_client(pos, new, &prev, nn);
                if (status < 0)
                        goto out_unlock;
@@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
                 * way that a SETCLIENTID_CONFIRM to pos can succeed is
                 * if new and pos point to the same server:
                 */
+found:
                refcount_inc(&pos->cl_count);
                spin_unlock(&nn->nfs_client_lock);
@@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
                case 0:
                        nfs4_swap_callback_idents(pos, new);
                        pos->cl_confirm = new->cl_confirm;
+                        nfs_mark_client_ready(pos, NFS_CS_READY);
                        prev = NULL;
                        *result = pos;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5b5f464f6f2a..4a379d7918f2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how)
        if (res)
                error = nfs_generic_commit_list(inode, &head, how, &cinfo);
        nfs_commit_end(cinfo.mds);
+        if (res == 0)
+                return res;
        if (error < 0)
                goto out_error;
        if (!may_wait)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 697f8ae7792d..fdf2aad73470 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -61,6 +61,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
                        else
                                gi->gid[i] = rqgi->gid[i];
                }
+                /* Each thread allocates its own gi, no race */
+                groups_sort(gi);
        } else {
                gi = get_group_info(rqgi);
        }
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index ded456f17de6..c584ad8d023c 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
        struct orangefs_kernel_op_s *op, *temp;
        __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
        static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
-        struct orangefs_kernel_op_s *cur_op = NULL;
+        struct orangefs_kernel_op_s *cur_op;
        unsigned long ret;
        /* We do not support blocking IO. */
@@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
                return -EAGAIN;
 restart:
+        cur_op = NULL;
        /* Get next op (if any) from top of list. */
        spin_lock(&orangefs_request_list_lock);
        list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 1668fd645c45..0d228cd087e6 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -452,7 +452,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
 static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
        struct file *file = iocb->ki_filp;
-        loff_t pos = *(&iocb->ki_pos);
+        loff_t pos = iocb->ki_pos;
        ssize_t rc = 0;
        BUG_ON(iocb->private);
@@ -492,9 +492,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
                }
        }
-        if (file->f_pos > i_size_read(file->f_mapping->host))
-                orangefs_i_size_write(file->f_mapping->host, file->f_pos);
        rc = generic_write_checks(iocb, iter);
        if (rc <= 0) {
@@ -508,7 +505,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
         * pos to the end of the file, so we will wait till now to set
         * pos...
         */
-        pos = *(&iocb->ki_pos);
+        pos = iocb->ki_pos;
        rc = do_readv_writev(ORANGEFS_IO_WRITE,
                             file,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 97adf7d100b5..2595453fe737 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -533,17 +533,6 @@ do {									\
        sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE;                  \
 } while (0)
-static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
-{
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
-        inode_lock(inode);
-#endif
-        i_size_write(inode, i_size);
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
-        inode_unlock(inode);
-#endif
-}
 static inline void orangefs_set_timeout(struct dentry *dentry)
 {
        unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index 835c6e148afc..0577d6dba8c8 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
 */
 void purge_waiting_ops(void)
 {
-        struct orangefs_kernel_op_s *op;
+        struct orangefs_kernel_op_s *op, *tmp;
        spin_lock(&orangefs_request_list_lock);
-        list_for_each_entry(op, &orangefs_request_list, list) {
+        list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) {
                gossip_debug(GOSSIP_WAIT_DEBUG,
                             "pvfs2-client-core: purging op tag %llu %s\n",
                             llu(op->tag),
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index cbfc196e5dc5..5ac415466861 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR
          an overlay which has redirects on a kernel that doesn't support this
          feature will have unexpected results.
+config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
+        bool "Overlayfs: follow redirects even if redirects are turned off"
+        default y
+        depends on OVERLAY_FS
+        help
+          Disable this to get a possibly more secure configuration, but that
+          might not be backward compatible with previous kernels.
+          For more information, see Documentation/filesystems/overlayfs.txt
 config OVERLAY_FS_INDEX
        bool "Overlayfs: turn on inodes index feature by default"
        depends on OVERLAY_FS
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index e13921824c70..f9788bc116a8 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
                spin_unlock(&dentry->d_lock);
        } else {
                kfree(redirect);
-                pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err);
+                pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
+                                    err);
                /* Fall back to userspace copy-up */
                err = -EXDEV;
        }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 625ed8066570..beb945e1963c 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
        /* Check if index is orphan and don't warn before cleaning it */
        if (d_inode(index)->i_nlink == 1 &&
-            ovl_get_nlink(index, origin.dentry, 0) == 0)
+            ovl_get_nlink(origin.dentry, index, 0) == 0)
                err = -ENOENT;
        dput(origin.dentry);
@@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                if (d.stop)
                        break;
+                /*
+                 * Following redirects can have security consequences: it's like
+                 * a symlink into the lower layer without the permission checks.
+                 * This is only a problem if the upper layer is untrusted (e.g
+                 * comes from an USB drive).  This can allow a non-readable file
+                 * or directory to become readable.
+                 *
+                 * Only following redirects when redirects are enabled disables
+                 * this attack vector when not necessary.
+                 */
+                err = -EPERM;
+                if (d.redirect && !ofs->config.redirect_follow) {
+                        pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
+                        goto out_put;
+                }
                if (d.redirect && d.redirect[0] == '/' && poe != roe) {
                        poe = roe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 13eab09a6b6f..b489099ccd49 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
 {
        struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
-        int err = IS_ERR(ret) ? PTR_ERR(ret) : 0;
+        int err = PTR_ERR_OR_ZERO(ret);
        pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
        return ret;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 752bab645879..9d0bc03bf6e4 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,8 @@ struct ovl_config {
        char *workdir;
        bool default_permissions;
        bool redirect_dir;
+        bool redirect_follow;
+        const char *redirect_mode;
        bool index;
 };
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 0daa4354fec4..8c98578d27a1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -499,7 +499,7 @@ out:
        return err;
 fail:
-        pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n",
+        pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
                            p->name, err);
        goto out;
 }
@@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
                        return PTR_ERR(rdt.cache);
        }
-        return iterate_dir(od->realfile, &rdt.ctx);
+        err = iterate_dir(od->realfile, &rdt.ctx);
+        ctx->pos = rdt.ctx.pos;
+        return err;
 }
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 288d20f9a55a..76440feb79f6 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(ovl_redirect_dir_def,
                 "Default to on or off for the redirect_dir feature");
+static bool ovl_redirect_always_follow =
+        IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
+module_param_named(redirect_always_follow, ovl_redirect_always_follow,
+                   bool, 0644);
+MODULE_PARM_DESC(ovl_redirect_always_follow,
+                 "Follow redirects even if redirect_dir feature is turned off");
 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
 module_param_named(index, ovl_index_def, bool, 0644);
 MODULE_PARM_DESC(ovl_index_def,
@@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
        kfree(ofs->config.lowerdir);
        kfree(ofs->config.upperdir);
        kfree(ofs->config.workdir);
+        kfree(ofs->config.redirect_mode);
        if (ofs->creator_cred)
                put_cred(ofs->creator_cred);
        kfree(ofs);
@@ -244,6 +252,7 @@ static void ovl_put_super(struct super_block *sb)
        ovl_free_fs(ofs);
 }
+/* Sync real dirty inodes in upper filesystem (if it exists) */
 static int ovl_sync_fs(struct super_block *sb, int wait)
 {
        struct ovl_fs *ofs = sb->s_fs_info;
@@ -252,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
        if (!ofs->upper_mnt)
                return 0;
-        upper_sb = ofs->upper_mnt->mnt_sb;
-        if (!upper_sb->s_op->sync_fs)
+        /*
+         * If this is a sync(2) call or an emergency sync, all the super blocks
+         * will be iterated, including upper_sb, so no need to do anything.
+         *
+         * If this is a syncfs(2) call, then we do need to call
+         * sync_filesystem() on upper_sb, but enough if we do it when being
+         * called with wait == 1.
+         */
+        if (!wait)
                return 0;
-        /* real inodes have already been synced by sync_filesystem(ovl_sb) */
+        upper_sb = ofs->upper_mnt->mnt_sb;
        down_read(&upper_sb->s_umount);
-        ret = upper_sb->s_op->sync_fs(upper_sb, wait);
+        ret = sync_filesystem(upper_sb);
        up_read(&upper_sb->s_umount);
        return ret;
 }
@@ -295,6 +314,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs)
        return (!ofs->upper_mnt || !ofs->workdir);
 }
+static const char *ovl_redirect_mode_def(void)
+{
+        return ovl_redirect_dir_def ? "on" : "off";
+}
 /**
 * ovl_show_options
 *
@@ -313,12 +337,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
        }
        if (ofs->config.default_permissions)
                seq_puts(m, ",default_permissions");
-        if (ofs->config.redirect_dir != ovl_redirect_dir_def)
+        if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
-                seq_printf(m, ",redirect_dir=%s",
+                seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
-                           ofs->config.redirect_dir ? "on" : "off");
        if (ofs->config.index != ovl_index_def)
-                seq_printf(m, ",index=%s",
+                seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
-                           ofs->config.index ? "on" : "off");
        return 0;
 }
@@ -348,8 +370,7 @@ enum {
        OPT_UPPERDIR,
        OPT_WORKDIR,
        OPT_DEFAULT_PERMISSIONS,
-        OPT_REDIRECT_DIR_ON,
+        OPT_REDIRECT_DIR,
-        OPT_REDIRECT_DIR_OFF,
        OPT_INDEX_ON,
        OPT_INDEX_OFF,
        OPT_ERR,
@@ -360,8 +381,7 @@ static const match_table_t ovl_tokens = {
        {OPT_UPPERDIR,                  "upperdir=%s"},
        {OPT_WORKDIR,                   "workdir=%s"},
        {OPT_DEFAULT_PERMISSIONS,       "default_permissions"},
-        {OPT_REDIRECT_DIR_ON,           "redirect_dir=on"},
+        {OPT_REDIRECT_DIR,              "redirect_dir=%s"},
-        {OPT_REDIRECT_DIR_OFF,          "redirect_dir=off"},
        {OPT_INDEX_ON,                  "index=on"},
        {OPT_INDEX_OFF,                 "index=off"},
        {OPT_ERR,                       NULL}
@@ -390,10 +410,37 @@ static char *ovl_next_opt(char **s)
        return sbegin;
 }
+static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
+{
+        if (strcmp(mode, "on") == 0) {
+                config->redirect_dir = true;
+                /*
+                 * Does not make sense to have redirect creation without
+                 * redirect following.
+                 */
+                config->redirect_follow = true;
+        } else if (strcmp(mode, "follow") == 0) {
+                config->redirect_follow = true;
+        } else if (strcmp(mode, "off") == 0) {
+                if (ovl_redirect_always_follow)
+                        config->redirect_follow = true;
+        } else if (strcmp(mode, "nofollow") != 0) {
+                pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
+                       mode);
+                return -EINVAL;
+        }
+        return 0;
+}
 static int ovl_parse_opt(char *opt, struct ovl_config *config)
 {
        char *p;
+        config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
+        if (!config->redirect_mode)
+                return -ENOMEM;
        while ((p = ovl_next_opt(&opt)) != NULL) {
                int token;
                substring_t args[MAX_OPT_ARGS];
@@ -428,12 +475,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                        config->default_permissions = true;
                        break;
-                case OPT_REDIRECT_DIR_ON:
+                case OPT_REDIRECT_DIR:
-                        config->redirect_dir = true;
+                        kfree(config->redirect_mode);
-                        break;
+                        config->redirect_mode = match_strdup(&args[0]);
+                        if (!config->redirect_mode)
-                case OPT_REDIRECT_DIR_OFF:
+                                return -ENOMEM;
-                        config->redirect_dir = false;
                        break;
                case OPT_INDEX_ON:
@@ -458,7 +504,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                config->workdir = NULL;
        }
-        return 0;
+        return ovl_parse_redirect_mode(config, config->redirect_mode);
 }
 #define OVL_WORKDIR_NAME "work"
@@ -1160,7 +1206,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        if (!cred)
                goto out_err;
-        ofs->config.redirect_dir = ovl_redirect_dir_def;
        ofs->config.index = ovl_index_def;
        err = ovl_parse_opt((char *) data, &ofs->config);
        if (err)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 79375fc115d2..d67a72dcb92c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -430,8 +430,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                 * safe because the task has stopped executing permanently.
                 */
                if (permitted && (task->flags & PF_DUMPCORE)) {
-                        eip = KSTK_EIP(task);
+                        if (try_get_task_stack(task)) {
-                        esp = KSTK_ESP(task);
+                                eip = KSTK_EIP(task);
+                                esp = KSTK_ESP(task);
+                                put_task_stack(task);
+                        }
                }
        }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 28fa85276eec..60316b52d659 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2268,7 +2268,7 @@ static int show_timer(struct seq_file *m, void *v)
        notify = timer->it_sigev_notify;
        seq_printf(m, "ID: %d\n", timer->it_id);
-        seq_printf(m, "signal: %d/%p\n",
+        seq_printf(m, "signal: %d/%px\n",
                   timer->sigq->info.si_signo,
                   timer->sigq->info.si_value.sival_ptr);
        seq_printf(m, "notify: %s/%s.%d\n",
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6f..06bd25d90ba5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
        INIT_LIST_HEAD(&s->s_mounts);
        s->s_user_ns = get_user_ns(user_ns);
+        init_rwsem(&s->s_umount);
+        lockdep_set_class(&s->s_umount, &type->s_umount_key);
+        /*
+         * sget() can have s_umount recursion.
+         *
+         * When it cannot find a suitable sb, it allocates a new
+         * one (this one), and tries again to find a suitable old
+         * one.
+         *
+         * In case that succeeds, it will acquire the s_umount
+         * lock of the old one. Since these are clearly distrinct
+         * locks, and this object isn't exposed yet, there's no
+         * risk of deadlocks.
+         *
+         * Annotate this by putting this lock in a different
+         * subclass.
+         */
+        down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
        if (security_sb_alloc(s))
                goto fail;
@@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
                goto fail;
        if (list_lru_init_memcg(&s->s_inode_lru))
                goto fail;
-        init_rwsem(&s->s_umount);
-        lockdep_set_class(&s->s_umount, &type->s_umount_key);
-        /*
-         * sget() can have s_umount recursion.
-         *
-         * When it cannot find a suitable sb, it allocates a new
-         * one (this one), and tries again to find a suitable old
-         * one.
-         *
-         * In case that succeeds, it will acquire the s_umount
-         * lock of the old one. Since these are clearly distrinct
-         * locks, and this object isn't exposed yet, there's no
-         * risk of deadlocks.
-         *
-         * Annotate this by putting this lock in a different
-         * subclass.
-         */
-        down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
        s->s_count = 1;
        atomic_set(&s->s_active, 1);
        mutex_init(&s->s_vfs_rename_mutex);
@@ -518,7 +517,11 @@ retry:
        hlist_add_head(&s->s_instances, &type->fs_supers);
        spin_unlock(&sb_lock);
        get_filesystem(type);
-        register_shrinker(&s->s_shrink);
+        err = register_shrinker(&s->s_shrink);
+        if (err) {
+                deactivate_locked_super(s);
+                s = ERR_PTR(err);
+        }
        return s;
 }
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ac9a4e65ca49..41a75f9f23fd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -570,11 +570,14 @@ out:
 static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                              struct userfaultfd_wait_queue *ewq)
 {
+        struct userfaultfd_ctx *release_new_ctx;
        if (WARN_ON_ONCE(current->flags & PF_EXITING))
                goto out;
        ewq->ctx = ctx;
        init_waitqueue_entry(&ewq->wq, current);
+        release_new_ctx = NULL;
        spin_lock(&ctx->event_wqh.lock);
        /*
@@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                new = (struct userfaultfd_ctx *)
                                        (unsigned long)
                                        ewq->msg.arg.reserved.reserved1;
+                                release_new_ctx = new;
-                                userfaultfd_ctx_put(new);
                        }
                        break;
                }
@@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
        __set_current_state(TASK_RUNNING);
        spin_unlock(&ctx->event_wqh.lock);
+        if (release_new_ctx) {
+                struct vm_area_struct *vma;
+                struct mm_struct *mm = release_new_ctx->mm;
+                /* the various vma->vm_userfaultfd_ctx still points to it */
+                down_write(&mm->mmap_sem);
+                for (vma = mm->mmap; vma; vma = vma->vm_next)
+                        if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+                                vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+                up_write(&mm->mmap_sem);
+                userfaultfd_ctx_put(release_new_ctx);
+        }
        /*
         * ctx may go away after this if the userfault pseudo fd is
         * already released.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a917..83ed7715f856 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
        ASSERT(args->agbno % args->alignment == 0);
        /* if not file data, insert new block into the reverse map btree */
-        if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+        if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
                                       args->agbno, args->len, &args->oinfo);
                if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
        bno_cur = cnt_cur = NULL;
        mp = tp->t_mountp;
-        if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+        if (!xfs_rmap_should_skip_owner_update(oinfo)) {
                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
                if (error)
                        goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671de..a76914db72ef 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
        int                     flags)
 {
        struct xfs_mount        *mp = dp->i_mount;
+        struct xfs_buf          *leaf_bp = NULL;
        struct xfs_da_args      args;
        struct xfs_defer_ops    dfops;
        struct xfs_trans_res    tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
                 * GROT: another possible req'mt for a double-split btree op.
                 */
                xfs_defer_init(args.dfops, args.firstblock);
-                error = xfs_attr_shortform_to_leaf(&args);
+                error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
                if (error)
                        goto out_defer_cancel;
+                /*
+                 * Prevent the leaf buffer from being unlocked so that a
+                 * concurrent AIL push cannot grab the half-baked leaf
+                 * buffer and run into problems with the write verifier.
+                 */
+                xfs_trans_bhold(args.trans, leaf_bp);
+                xfs_defer_bjoin(args.dfops, leaf_bp);
                xfs_defer_ijoin(args.dfops, dp);
                error = xfs_defer_finish(&args.trans, args.dfops);
                if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
                /*
                 * Commit the leaf transformation.  We'll need another (linked)
-                 * transaction to add the new attribute to the leaf.
+                 * transaction to add the new attribute to the leaf, which
+                 * means that we have to hold & join the leaf buffer here too.
                 */
                error = xfs_trans_roll_inode(&args.trans, dp);
                if (error)
                        goto out;
+                xfs_trans_bjoin(args.trans, leaf_bp);
+                leaf_bp = NULL;
        }
        if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
 out_defer_cancel:
        xfs_defer_cancel(&dfops);
-        args.trans = NULL;
 out:
+        if (leaf_bp)
+                xfs_trans_brelse(args.trans, leaf_bp);
        if (args.trans)
                xfs_trans_cancel(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac..601eaa36f1ad 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 }
 /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
 */
 int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+        struct xfs_da_args      *args,
+        struct xfs_buf          **leaf_bp)
 {
        xfs_inode_t *dp;
        xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
                sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
        }
        error = 0;
+        *leaf_bp = bp;
 out:
        kmem_free(tmpbuffer);
        return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..894124efb421 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void	xfs_attr_shortform_create(struct xfs_da_args *args);
 void    xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int     xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int     xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int     xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int     xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+                        struct xfs_buf **leaf_bp);
 int     xfs_attr_shortform_remove(struct xfs_da_args *args);
 int     xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int     xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1210f684d3c2..1bddbba6b80c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
         * blowing out the transaction with a mix of EFIs and reflink
         * adjustments.
         */
-        if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+        if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
                max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
        else
                max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
+        /* Hold the (previously bjoin'd) buffer locked across the roll. */
+        for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+                xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
        trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
        /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
+        /* Rejoin the buffers and dirty them so the log moves forward. */
+        for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+                xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+                xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+        }
        return error;
 }
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
                }
        }
+        ASSERT(0);
+        return -EFSCORRUPTED;
+}
+/*
+ * Add this buffer to the deferred op.  Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+        struct xfs_defer_ops            *dop,
+        struct xfs_buf                  *bp)
+{
+        int                             i;
+        for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+                if (dop->dop_bufs[i] == bp)
+                        return 0;
+                else if (dop->dop_bufs[i] == NULL) {
+                        dop->dop_bufs[i] = bp;
+                        return 0;
+                }
+        }
+        ASSERT(0);
        return -EFSCORRUPTED;
 }
@@ -493,9 +528,7 @@ xfs_defer_init(
        struct xfs_defer_ops            *dop,
        xfs_fsblock_t                   *fbp)
 {
-        dop->dop_committed = false;
+        memset(dop, 0, sizeof(struct xfs_defer_ops));
-        dop->dop_low = false;
-        memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
        *fbp = NULLFSBLOCK;
        INIT_LIST_HEAD(&dop->dop_intake);
        INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
 };
 #define XFS_DEFER_OPS_NR_INODES 2       /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS   2       /* join up to two buffers */
 struct xfs_defer_ops {
        bool                    dop_committed;  /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
        struct list_head        dop_intake;     /* unlogged pending work */
        struct list_head        dop_pending;    /* logged pending work */
-        /* relog these inodes with each roll */
+        /* relog these with each roll */
        struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+        struct xfs_buf          *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
 };
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index de3f04a98656..3b57ef0f2f76 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
 xfs_ialloc_ag_select(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_ino_t       parent,         /* parent directory inode number */
-        umode_t         mode,           /* bits set to indicate file type */
+        umode_t         mode)           /* bits set to indicate file type */
-        int             okalloc)        /* ok to allocate more space */
 {
        xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
        xfs_agnumber_t  agno;           /* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
                        return agno;
                }
-                if (!okalloc)
-                        goto nextag;
                if (!pag->pagf_init) {
                        error = xfs_alloc_pagf_init(mp, tp, agno, flags);
                        if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
        struct xfs_trans        *tp,
        xfs_ino_t               parent,
        umode_t                 mode,
-        int                     okalloc,
        struct xfs_buf          **IO_agbp,
        xfs_ino_t               *inop)
 {
@@ -1692,6 +1687,7 @@ xfs_dialloc(
        int                     noroom = 0;
        xfs_agnumber_t          start_agno;
        struct xfs_perag        *pag;
+        int                     okalloc = 1;
        if (*IO_agbp) {
                /*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
         * We do not have an agbp, so select an initial allocation
         * group for inode allocation.
         */
-        start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+        start_agno = xfs_ialloc_ag_select(tp, parent, mode);
        if (start_agno == NULLAGNUMBER) {
                *inop = NULLFSINO;
                return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index d2bdcd5e7312..66a8de0b1caa 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -81,7 +81,6 @@ xfs_dialloc(
        struct xfs_trans *tp,           /* transaction pointer */
        xfs_ino_t       parent,         /* parent inode (directory) */
        umode_t         mode,           /* mode bits for new inode */
-        int             okalloc,        /* ok to allocate more space */
        struct xfs_buf  **agbp,         /* buf for a.g. inode header */
        xfs_ino_t       *inop);         /* inode number allocated */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d937..b0f31791c7e6 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
        struct xfs_iext_leaf    *new = NULL;
        int                     nr_entries, i;
-        trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
        if (ifp->if_height == 0)
                xfs_iext_alloc_root(ifp, cur);
        else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
        xfs_iext_set(cur_rec(cur), irec);
        ifp->if_bytes += sizeof(struct xfs_iext_rec);
+        trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
        if (new)
                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 }
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142..c40d26763075 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-        int                     error;
        trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
        /* Add refcount btree reservation */
-        error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+        return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
-        if (error)
-                return error;
-        /* Add rmap entry */
-        if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-                error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
-                                rcur->bc_private.a.agno,
-                                agbno, aglen, XFS_RMAP_OWN_COW);
-                if (error)
-                        return error;
-        }
-        return error;
 }
 /*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-        int                     error;
        trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
        /* Remove refcount btree reservation */
-        error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+        return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
-        if (error)
-                return error;
-        /* Remove rmap entry */
-        if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-                error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
-                                rcur->bc_private.a.agno,
-                                agbno, aglen, XFS_RMAP_OWN_COW);
-                if (error)
-                        return error;
-        }
-        return error;
 }
 /* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+        int                             error;
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
-        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+        error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
                        fsb, len);
+        if (error)
+                return error;
+        /* Add rmap entry */
+        return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                        XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 }
 /* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+        int                             error;
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
+        /* Remove rmap entry */
+        error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                        XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+        if (error)
+                return error;
        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
                        fsb, len);
 }
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3b..50db920ceeeb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(
 }
 /*
+ * Perform all the relevant owner checks for a removal op.  If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+        struct xfs_mount        *mp,
+        uint64_t                ltoff,
+        struct xfs_rmap_irec    *rec,
+        xfs_fsblock_t           bno,
+        xfs_filblks_t           len,
+        uint64_t                owner,
+        uint64_t                offset,
+        unsigned int            flags)
+{
+        int                     error = 0;
+        if (owner == XFS_RMAP_OWN_UNKNOWN)
+                return 0;
+        /* Make sure the unwritten flag matches. */
+        XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+                        (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+        /* Make sure the owner matches what we expect to find in the tree. */
+        XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+        /* Check the offset, if necessary. */
+        if (XFS_RMAP_NON_INODE_OWNER(owner))
+                goto out;
+        if (flags & XFS_RMAP_BMBT_BLOCK) {
+                XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+                                out);
+        } else {
+                XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+                XFS_WANT_CORRUPTED_GOTO(mp,
+                                ltoff + rec->rm_blockcount >= offset + len,
+                                out);
+        }
+out:
+        return error;
+}
+/*
 * Find the extent in the rmap btree and remove it.
 *
 * The record we find should always be an exact match for the extent that we're
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
                goto out_done;
        }
-        /* Make sure the unwritten flag matches. */
+        /*
-        XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+         * If we're doing an unknown-owner removal for EFI recovery, we expect
-                        (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+         * to find the full range in the rmapbt or nothing at all.  If we
+         * don't find any rmaps overlapping either end of the range, we're
+         * done.  Hopefully this means that the EFI creator already queued
+         * (and finished) a RUI to remove the rmap.
+         */
+        if (owner == XFS_RMAP_OWN_UNKNOWN &&
+            ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+                struct xfs_rmap_irec    rtrec;
+                error = xfs_btree_increment(cur, 0, &i);
+                if (error)
+                        goto out_error;
+                if (i == 0)
+                        goto out_done;
+                error = xfs_rmap_get_rec(cur, &rtrec, &i);
+                if (error)
+                        goto out_error;
+                XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+                if (rtrec.rm_startblock >= bno + len)
+                        goto out_done;
+        }
        /* Make sure the extent we found covers the entire freeing range. */
        XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
-                ltrec.rm_startblock + ltrec.rm_blockcount >=
+                        ltrec.rm_startblock + ltrec.rm_blockcount >=
-                bno + len, out_error);
+                        bno + len, out_error);
-        /* Make sure the owner matches what we expect to find in the tree. */
+        /* Check owner information. */
-        XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
+        error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
-                                    XFS_RMAP_NON_INODE_OWNER(owner), out_error);
+                        offset, flags);
+        if (error)
-        /* Check the offset, if necessary. */
+                goto out_error;
-        if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
-                if (flags & XFS_RMAP_BMBT_BLOCK) {
-                        XFS_WANT_CORRUPTED_GOTO(mp,
-                                        ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
-                                        out_error);
-                } else {
-                        XFS_WANT_CORRUPTED_GOTO(mp,
-                                        ltrec.rm_offset <= offset, out_error);
-                        XFS_WANT_CORRUPTED_GOTO(mp,
-                                        ltoff + ltrec.rm_blockcount >= offset + len,
-                                        out_error);
-                }
-        }
        if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
                /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
                flags |= XFS_RMAP_UNWRITTEN;
        trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
                        unwritten, oinfo);
+        ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
        /*
         * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080..0fcd5b1ba729 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
 xfs_rmap_skip_owner_update(
        struct xfs_owner_info   *oi)
 {
-        oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+        xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+static inline bool
+xfs_rmap_should_skip_owner_update(
+        struct xfs_owner_info   *oi)
+{
+        return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+static inline void
+xfs_rmap_any_owner_update(
+        struct xfs_owner_info   *oi)
+{
+        xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
 }
 /* Reverse mapping functions. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9c42c4efd01e..ab3aef2ae823 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -46,7 +46,6 @@
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
-#include "scrub/scrub.h"
 #include "scrub/btree.h"
 /*
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 472080e75788..86daed0e3a45 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -26,7 +26,6 @@
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_da_format.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_trans.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 21e2d70884e1..4fc526a27a94 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -399,7 +399,7 @@ xfs_map_blocks(
               (ip->i_df.if_flags & XFS_IFEXTENTS));
        ASSERT(offset <= mp->m_super->s_maxbytes);
-        if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
+        if (offset > mp->m_super->s_maxbytes - count)
                count = mp->m_super->s_maxbytes - offset;
        end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
        lockmode = xfs_ilock_data_map_shared(ip);
        ASSERT(offset <= mp->m_super->s_maxbytes);
-        if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
+        if (offset > mp->m_super->s_maxbytes - size)
                size = mp->m_super->s_maxbytes - offset;
        end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c5451210..64da90655e95 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
                return error;
        efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
-        xfs_rmap_skip_owner_update(&oinfo);
+        xfs_rmap_any_owner_update(&oinfo);
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &efip->efi_format.efi_extents[i];
                error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbb..60a2e128cb6a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
                 * this doesn't actually exist in the rmap btree.
                 */
                xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+                error = xfs_rmap_free(tp, bp, agno,
+                                be32_to_cpu(agf->agf_length) - new,
+                                new, &oinfo);
+                if (error)
+                        goto error0;
                error = xfs_free_extent(tp,
                                XFS_AGB_TO_FSB(mp, agno,
                                        be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1e..3861d61fb265 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
 * (We'll just piggyback on the post-EOF prealloc space workqueue.)
 */
-STATIC void
+void
 xfs_queue_cowblocks(
        struct xfs_mount *mp)
 {
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
        return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
 }
+static inline unsigned long
+xfs_iflag_for_tag(
+        int             tag)
+{
+        switch (tag) {
+        case XFS_ICI_EOFBLOCKS_TAG:
+                return XFS_IEOFBLOCKS;
+        case XFS_ICI_COWBLOCKS_TAG:
+                return XFS_ICOWBLOCKS;
+        default:
+                ASSERT(0);
+                return 0;
+        }
+}
 static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
        xfs_inode_t     *ip,
        void            (*execute)(struct xfs_mount *mp),
        void            (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
         * Don't bother locking the AG and looking up in the radix trees
         * if we already know that we have the tag set.
         */
-        if (ip->i_flags & XFS_IEOFBLOCKS)
+        if (ip->i_flags & xfs_iflag_for_tag(tag))
                return;
        spin_lock(&ip->i_flags_lock);
-        ip->i_flags |= XFS_IEOFBLOCKS;
+        ip->i_flags |= xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_eofblocks_tag(ip);
-        return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+        return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
                        trace_xfs_perag_set_eofblocks,
                        XFS_ICI_EOFBLOCKS_TAG);
 }
 static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
        xfs_inode_t     *ip,
        void            (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
                                    int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
        struct xfs_perag *pag;
        spin_lock(&ip->i_flags_lock);
-        ip->i_flags &= ~XFS_IEOFBLOCKS;
+        ip->i_flags &= ~xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_eofblocks_tag(ip);
-        return __xfs_inode_clear_eofblocks_tag(ip,
+        return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
 }
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_cowblocks_tag(ip);
-        return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+        return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
                        trace_xfs_perag_set_cowblocks,
                        XFS_ICI_COWBLOCKS_TAG);
 }
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_cowblocks_tag(ip);
-        return __xfs_inode_clear_eofblocks_tag(ip,
+        return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e5498..d4a77588eca1 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
 void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 801274126648..6f95bdb408ce 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -749,7 +749,6 @@ xfs_ialloc(
        xfs_nlink_t     nlink,
        dev_t           rdev,
        prid_t          prid,
-        int             okalloc,
        xfs_buf_t       **ialloc_context,
        xfs_inode_t     **ipp)
 {
@@ -765,7 +764,7 @@ xfs_ialloc(
         * Call the space management code to pick
         * the on-disk inode to be allocated.
         */
-        error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
+        error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
                            ialloc_context, &ino);
        if (error)
                return error;
@@ -957,7 +956,6 @@ xfs_dir_ialloc(
        xfs_nlink_t     nlink,
        dev_t           rdev,
        prid_t          prid,           /* project id */
-        int             okalloc,        /* ok to allocate new space */
        xfs_inode_t     **ipp,          /* pointer to inode; it will be
                                           locked. */
        int             *committed)
@@ -988,8 +986,8 @@ xfs_dir_ialloc(
         * transaction commit so that no other process can steal
         * the inode(s) that we've just allocated.
         */
-        code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
+        code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
-                          &ialloc_context, &ip);
+                        &ip);
        /*
         * Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
                 * this call should always succeed.
                 */
                code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-                                  okalloc, &ialloc_context, &ip);
+                                  &ialloc_context, &ip);
                /*
                 * If we get an error at this point, return to the caller
@@ -1182,11 +1180,6 @@ xfs_create(
                xfs_flush_inodes(mp);
                error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
        }
-        if (error == -ENOSPC) {
-                /* No space at all so try a "no-allocation" reservation */
-                resblks = 0;
-                error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-        }
        if (error)
                goto out_release_inode;
@@ -1203,19 +1196,13 @@ xfs_create(
        if (error)
                goto out_trans_cancel;
-        if (!resblks) {
-                error = xfs_dir_canenter(tp, dp, name);
-                if (error)
-                        goto out_trans_cancel;
-        }
        /*
         * A newly created regular or special file just has one directory
         * entry pointing to them, but a directory also the "." entry
         * pointing to itself.
         */
-        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
+        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
-                               prid, resblks > 0, &ip, NULL);
+                        NULL);
        if (error)
                goto out_trans_cancel;
@@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
        tres = &M_RES(mp)->tr_create_tmpfile;
        error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
-        if (error == -ENOSPC) {
-                /* No space at all so try a "no-allocation" reservation */
-                resblks = 0;
-                error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
-        }
        if (error)
                goto out_release_inode;
@@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
        if (error)
                goto out_trans_cancel;
-        error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
+        error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
-                                prid, resblks > 0, &ip, NULL);
        if (error)
                goto out_trans_cancel;
@@ -1506,6 +1487,24 @@ xfs_link(
        return error;
 }
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+        struct xfs_inode        *ip)
+{
+        struct xfs_ifork        *dfork;
+        struct xfs_ifork        *cfork;
+        if (!xfs_is_reflink_inode(ip))
+                return;
+        dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+        cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+        if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+                ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+        if (cfork->if_bytes == 0)
+                xfs_inode_clear_cowblocks_tag(ip);
+}
 /*
 * Free up the underlying blocks past new_size.  The new size must be smaller
 * than the current size.  This routine can be used both for the attribute and
@@ -1602,15 +1601,7 @@ xfs_itruncate_extents(
        if (error)
                goto out;
-        /*
+        xfs_itruncate_clear_reflink_flags(ip);
-         * Clear the reflink flag if there are no data fork blocks and
-         * there are no extents staged in the cow fork.
-         */
-        if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-                if (ip->i_d.di_nblocks == 0)
-                        ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
-                xfs_inode_clear_cowblocks_tag(ip);
-        }
        /*
         * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cc13c3763721..d383e392ec9d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
 * log recovery to replay a bmap operation on the inode.
 */
 #define XFS_IRECOVERY           (1 << 11)
+#define XFS_ICOWBLOCKS          (1 << 12)/* has the cowblocks tag set */
 /*
 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -428,7 +429,7 @@ xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t    xfs_get_cowextsz_hint(struct xfs_inode *ip);
 int             xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
-                               xfs_nlink_t, dev_t, prid_t, int,
+                               xfs_nlink_t, dev_t, prid_t,
                               struct xfs_inode **, int *);
 /* from xfs_file.c */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 33eb4fb2e3fd..66e1edbfb2b2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(
        }
        ASSERT(offset <= mp->m_super->s_maxbytes);
-        if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+        if (offset > mp->m_super->s_maxbytes - length)
                length = mp->m_super->s_maxbytes - offset;
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
        end_fsb = XFS_B_TO_FSB(mp, offset + length);
@@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(
        ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
        error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
-                               &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+                               &nimaps, XFS_BMAPI_ATTRFORK);
 out_unlock:
        xfs_iunlock(ip, lockmode);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 010a13a201aa..b897b11afb2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,7 +48,7 @@
 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
+STATIC void     xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
 STATIC void     xfs_qm_dqfree_one(struct xfs_dquot *dqp);
 /*
 * We use the batch lookup interface to iterate over the dquots as it
@@ -695,9 +695,17 @@ xfs_qm_init_quotainfo(
        qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
        qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
        qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
-        register_shrinker(&qinf->qi_shrinker);
+        error = register_shrinker(&qinf->qi_shrinker);
+        if (error)
+                goto out_free_inos;
        return 0;
+out_free_inos:
+        mutex_destroy(&qinf->qi_quotaofflock);
+        mutex_destroy(&qinf->qi_tree_lock);
+        xfs_qm_destroy_quotainos(qinf);
 out_free_lru:
        list_lru_destroy(&qinf->qi_lru);
 out_free_qinf:
@@ -706,7 +714,6 @@ out_free_qinf:
        return error;
 }
 /*
 * Gets called when unmounting a filesystem or when all quotas get
 * turned off.
@@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo(
        unregister_shrinker(&qi->qi_shrinker);
        list_lru_destroy(&qi->qi_lru);
+        xfs_qm_destroy_quotainos(qi);
-        if (qi->qi_uquotaip) {
+        mutex_destroy(&qi->qi_tree_lock);
-                IRELE(qi->qi_uquotaip);
-                qi->qi_uquotaip = NULL; /* paranoia */
-        }
-        if (qi->qi_gquotaip) {
-                IRELE(qi->qi_gquotaip);
-                qi->qi_gquotaip = NULL;
-        }
-        if (qi->qi_pquotaip) {
-                IRELE(qi->qi_pquotaip);
-                qi->qi_pquotaip = NULL;
-        }
        mutex_destroy(&qi->qi_quotaofflock);
        kmem_free(qi);
        mp->m_quotainfo = NULL;
@@ -793,8 +789,8 @@ xfs_qm_qino_alloc(
                return error;
        if (need_alloc) {
-                error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
+                error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
-                                                                &committed);
+                                &committed);
                if (error) {
                        xfs_trans_cancel(tp);
                        return error;
@@ -1600,6 +1596,24 @@ error_rele:
 }
 STATIC void
+xfs_qm_destroy_quotainos(
+        xfs_quotainfo_t *qi)
+{
+        if (qi->qi_uquotaip) {
+                IRELE(qi->qi_uquotaip);
+                qi->qi_uquotaip = NULL; /* paranoia */
+        }
+        if (qi->qi_gquotaip) {
+                IRELE(qi->qi_gquotaip);
+                qi->qi_gquotaip = NULL;
+        }
+        if (qi->qi_pquotaip) {
+                IRELE(qi->qi_pquotaip);
+                qi->qi_pquotaip = NULL;
+        }
+}
+STATIC void
 xfs_qm_dqfree_one(
        struct xfs_dquot        *dqp)
 {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cc041a29eb70..47aea2e82c26 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
 #include "xfs_alloc.h"
 #include "xfs_quota_defs.h"
 #include "xfs_quota.h"
-#include "xfs_btree.h"
-#include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
 #include "xfs_iomap.h"
 #include "xfs_rmap_btree.h"
@@ -456,6 +454,8 @@ retry:
        if (error)
                goto out_bmap_cancel;
+        xfs_inode_set_cowblocks_tag(ip);
        /* Finish up. */
        error = xfs_defer_finish(&tp, &dfops);
        if (error)
@@ -492,8 +492,9 @@ xfs_reflink_find_cow_mapping(
        struct xfs_iext_cursor          icur;
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-        ASSERT(xfs_is_reflink_inode(ip));
+        if (!xfs_is_reflink_inode(ip))
+                return false;
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
                return false;
@@ -612,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
                        /* Remove the mapping from the CoW fork. */
                        xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+                } else {
+                        /* Didn't do anything, push cursor back. */
+                        xfs_iext_prev(ifp, &icur);
                }
 next_extent:
                if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -727,7 +731,7 @@ xfs_reflink_end_cow(
                        (unsigned int)(end_fsb - offset_fsb),
                        XFS_DATA_FORK);
        error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                        resblks, 0, 0, &tp);
+                        resblks, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                goto out;
@@ -1293,6 +1297,17 @@ xfs_reflink_remap_range(
        trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+        /*
+         * Clear out post-eof preallocations because we don't have page cache
+         * backing the delayed allocations and they'll never get freed on
+         * their own.
+         */
+        if (xfs_can_free_eofblocks(dest, true)) {
+                ret = xfs_free_eofblocks(dest);
+                if (ret)
+                        goto out_unlock;
+        }
        /* Set flags and remap blocks. */
        ret = xfs_reflink_set_inode_flag(src, dest);
        if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5122d3021117..1dacccc367f8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                        return error;
                }
+                xfs_queue_cowblocks(mp);
                /* Create the per-AG metadata reservation pool .*/
                error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+                /* Get rid of any leftover CoW reservations... */
+                cancel_delayed_work_sync(&mp->m_cowblocks_work);
+                error = xfs_icache_free_cowblocks(mp, NULL);
+                if (error) {
+                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                        return error;
+                }
                /* Free the per-AG metadata reservation pool. */
                error = xfs_fs_unreserve_ag_blocks(mp);
                if (error) {
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 68d3ca2c4968..2e9e793a8f9d 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -232,11 +232,6 @@ xfs_symlink(
        resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
-        if (error == -ENOSPC && fs_blocks == 0) {
-                resblks = 0;
-                error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
-                                &tp);
-        }
        if (error)
                goto out_release_inode;
@@ -260,14 +255,6 @@ xfs_symlink(
                goto out_trans_cancel;
        /*
-         * Check for ability to enter directory entry, if no space reserved.
-         */
-        if (!resblks) {
-                error = xfs_dir_canenter(tp, dp, link_name);
-                if (error)
-                        goto out_trans_cancel;
-        }
-        /*
         * Initialize the bmap freelist prior to calling either
         * bmapi or the directory create code.
         */
@@ -277,7 +264,7 @@ xfs_symlink(
         * Allocate an inode for the symlink.
         */
        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-                               prid, resblks > 0, &ip, NULL);
+                               prid, &ip, NULL);
        if (error)
                goto out_trans_cancel;
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 5d95fe348294..35f3546b6af5 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -24,7 +24,6 @@
 #include "xfs_mount.h"
 #include "xfs_defer.h"
 #include "xfs_da_format.h"
-#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_da_btree.h"
author	Jason Gunthorpe <jgg@mellanox.com>	2018-01-29 15:26:40 -0500
committer	Jason Gunthorpe <jgg@mellanox.com>	2018-01-30 11:30:00 -0500
commit	e7996a9a77fc669387da43ff4823b91cc4872bd0 (patch)
tree	617f0a128e222539d67e8cccc359f1bc4b984900 /fs
parent	b5fa635aab8f0d39a824c01991266a6d06f007fb (diff)
parent	d8a5b80568a9cb66810e75b182018e9edb68e8ff (diff)