Merge 4.14-rc4 into staging-next

We want the staging/iio fixes in here as well to handle merge issues. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2017-10-09 03:02:35 -0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2017-10-09 03:02:35 -0400
commit: 1236d6bb6e19fc72ffc6bbcdeb1bfefe450e54ee (patch)
tree: 47da3feee8e263e8c9352c85cf518e624be3c211 /fs
parent: 750b1a6894ecc9b178c6e3d0a1170122971b2036 (diff)
parent: 8a5776a5f49812d29fe4b2d0a2d71675c3facf3f (diff)
62 files changed, 657 insertions, 272 deletions
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ce7181ea60fa..2a46762def31 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -54,7 +54,7 @@ typedef struct {
        int size;                       /* size of magic/mask */
        char *magic;                    /* magic or filename extension */
        char *mask;                     /* mask, NULL for exact match */
-        char *interpreter;              /* filename of interpreter */
+        const char *interpreter;        /* filename of interpreter */
        char *name;
        struct dentry *dentry;
        struct file *interp_file;
@@ -131,27 +131,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
 {
        Node *fmt;
        struct file *interp_file = NULL;
-        char iname[BINPRM_BUF_SIZE];
-        const char *iname_addr = iname;
        int retval;
        int fd_binary = -1;
        retval = -ENOEXEC;
        if (!enabled)
-                goto ret;
+                return retval;
        /* to keep locking time low, we copy the interpreter string */
        read_lock(&entries_lock);
        fmt = check_file(bprm);
        if (fmt)
-                strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
+                dget(fmt->dentry);
        read_unlock(&entries_lock);
        if (!fmt)
-                goto ret;
+                return retval;
        /* Need to be able to load the file after exec */
+        retval = -ENOENT;
        if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
-                return -ENOENT;
+                goto ret;
        if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
                retval = remove_arg_zero(bprm);
@@ -195,22 +194,22 @@ static int load_misc_binary(struct linux_binprm *bprm)
        bprm->argc++;
        /* add the interp as argv[0] */
-        retval = copy_strings_kernel(1, &iname_addr, bprm);
+        retval = copy_strings_kernel(1, &fmt->interpreter, bprm);
        if (retval < 0)
                goto error;
        bprm->argc++;
        /* Update interp in case binfmt_script needs it. */
-        retval = bprm_change_interp(iname, bprm);
+        retval = bprm_change_interp(fmt->interpreter, bprm);
        if (retval < 0)
                goto error;
-        if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
+        if (fmt->flags & MISC_FMT_OPEN_FILE) {
                interp_file = filp_clone_open(fmt->interp_file);
                if (!IS_ERR(interp_file))
                        deny_write_access(interp_file);
        } else {
-                interp_file = open_exec(iname);
+                interp_file = open_exec(fmt->interpreter);
        }
        retval = PTR_ERR(interp_file);
        if (IS_ERR(interp_file))
@@ -238,6 +237,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
                goto error;
 ret:
+        dput(fmt->dentry);
        return retval;
 error:
        if (fd_binary > 0)
@@ -594,8 +594,13 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 static void bm_evict_inode(struct inode *inode)
 {
+        Node *e = inode->i_private;
+        if (e->flags & MISC_FMT_OPEN_FILE)
+                filp_close(e->interp_file, NULL);
        clear_inode(inode);
-        kfree(inode->i_private);
+        kfree(e);
 }
 static void kill_node(Node *e)
@@ -603,24 +608,14 @@ static void kill_node(Node *e)
        struct dentry *dentry;
        write_lock(&entries_lock);
-        dentry = e->dentry;
+        list_del_init(&e->list);
-        if (dentry) {
-                list_del_init(&e->list);
-                e->dentry = NULL;
-        }
        write_unlock(&entries_lock);
-        if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
+        dentry = e->dentry;
-                filp_close(e->interp_file, NULL);
+        drop_nlink(d_inode(dentry));
-                e->interp_file = NULL;
+        d_drop(dentry);
-        }
+        dput(dentry);
+        simple_release_fs(&bm_mnt, &entry_count);
-        if (dentry) {
-                drop_nlink(d_inode(dentry));
-                d_drop(dentry);
-                dput(dentry);
-                simple_release_fs(&bm_mnt, &entry_count);
-        }
 }
 /* /<entry> */
@@ -665,7 +660,8 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
                root = file_inode(file)->i_sb->s_root;
                inode_lock(d_inode(root));
-                kill_node(e);
+                if (!list_empty(&e->list))
+                        kill_node(e);
                inode_unlock(d_inode(root));
                break;
@@ -794,7 +790,7 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
                inode_lock(d_inode(root));
                while (!list_empty(&entries))
-                        kill_node(list_entry(entries.next, Node, list));
+                        kill_node(list_first_entry(&entries, Node, list));
                inode_unlock(d_inode(root));
                break;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index afdf4e3cafc2..7cde3f46ad26 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -19,7 +19,6 @@ static int load_script(struct linux_binprm *bprm)
        const char *i_arg, *i_name;
        char *cp;
        struct file *file;
-        char interp[BINPRM_BUF_SIZE];
        int retval;
        if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
@@ -55,7 +54,7 @@ static int load_script(struct linux_binprm *bprm)
                        break;
        }
        for (cp = bprm->buf+2; (*cp == ' ') || (*cp == '\t'); cp++);
-        if (*cp == '\0') 
+        if (*cp == '\0')
                return -ENOEXEC; /* No interpreter name found */
        i_name = cp;
        i_arg = NULL;
@@ -65,7 +64,6 @@ static int load_script(struct linux_binprm *bprm)
                *cp++ = '\0';
        if (*cp)
                i_arg = cp;
-        strcpy (interp, i_name);
        /*
         * OK, we've parsed out the interpreter name and
         * (optional) argument.
@@ -80,24 +78,27 @@ static int load_script(struct linux_binprm *bprm)
        if (retval)
                return retval;
        retval = copy_strings_kernel(1, &bprm->interp, bprm);
-        if (retval < 0) return retval; 
+        if (retval < 0)
+                return retval;
        bprm->argc++;
        if (i_arg) {
                retval = copy_strings_kernel(1, &i_arg, bprm);
-                if (retval < 0) return retval; 
+                if (retval < 0)
+                        return retval;
                bprm->argc++;
        }
        retval = copy_strings_kernel(1, &i_name, bprm);
-        if (retval) return retval; 
+        if (retval)
+                return retval;
        bprm->argc++;
-        retval = bprm_change_interp(interp, bprm);
+        retval = bprm_change_interp(i_name, bprm);
        if (retval < 0)
                return retval;
        /*
         * OK, now restart the process with the interpreter's dentry.
         */
-        file = open_exec(interp);
+        file = open_exec(i_name);
        if (IS_ERR(file))
                return PTR_ERR(file);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b51d23f5cafa..280384bf34f1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -107,7 +107,8 @@ static void end_compressed_bio_read(struct bio *bio)
        struct inode *inode;
        struct page *page;
        unsigned long index;
-        int ret;
+        unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
+        int ret = 0;
        if (bio->bi_status)
                cb->errors = 1;
@@ -118,6 +119,21 @@ static void end_compressed_bio_read(struct bio *bio)
        if (!refcount_dec_and_test(&cb->pending_bios))
                goto out;
+        /*
+         * Record the correct mirror_num in cb->orig_bio so that
+         * read-repair can work properly.
+         */
+        ASSERT(btrfs_io_bio(cb->orig_bio));
+        btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
+        cb->mirror_num = mirror;
+        /*
+         * Some IO in this cb have failed, just skip checksum as there
+         * is no way it could be correct.
+         */
+        if (cb->errors == 1)
+                goto csum_failed;
        inode = cb->inode;
        ret = check_compressed_csum(BTRFS_I(inode), cb,
                                    (u64)bio->bi_iter.bi_sector << 9);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5a8933da39a7..8fc690384c58 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -709,7 +709,6 @@ struct btrfs_delayed_root;
 #define BTRFS_FS_OPEN                           5
 #define BTRFS_FS_QUOTA_ENABLED                  6
 #define BTRFS_FS_QUOTA_ENABLING                 7
-#define BTRFS_FS_QUOTA_DISABLING                8
 #define BTRFS_FS_UPDATE_UUID_TREE_GEN           9
 #define BTRFS_FS_CREATING_FREE_SPACE_TREE       10
 #define BTRFS_FS_BTREE_ERR                      11
@@ -723,7 +722,7 @@ struct btrfs_delayed_root;
 * Indicate that a whole-filesystem exclusive operation is running
 * (device replace, resize, device add/delete, balance)
 */
-#define BTRFS_FS_EXCL_OP                        14
+#define BTRFS_FS_EXCL_OP                        16
 struct btrfs_fs_info {
        u8 fsid[BTRFS_FSID_SIZE];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 487bbe4fb3c6..dfdab849037b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3643,7 +3643,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
        u64 flags;
        do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
-        backup_super_roots(fs_info);
+        /*
+         * max_mirrors == 0 indicates we're from commit_transaction,
+         * not from fsync where the tree roots in fs_info have not
+         * been consistent on disk.
+         */
+        if (max_mirrors == 0)
+                backup_super_roots(fs_info);
        sb = fs_info->super_for_commit;
        dev_item = &sb->dev_item;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e5bb0cdd3cd..970190cd347e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2801,7 +2801,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
                }
        }
-        bio = btrfs_bio_alloc(bdev, sector << 9);
+        bio = btrfs_bio_alloc(bdev, (u64)sector << 9);
        bio_add_page(bio, page, page_size, offset);
        bio->bi_end_io = end_io_func;
        bio->bi_private = tree;
@@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        unsigned int write_flags = 0;
        unsigned long nr_written = 0;
-        if (wbc->sync_mode == WB_SYNC_ALL)
+        write_flags = wbc_to_write_flags(wbc);
-                write_flags = REQ_SYNC;
        trace___extent_writepage(page, inode, wbc);
@@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
        unsigned long start, end;
-        unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
+        unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
        int ret = 0;
        clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4063,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
        if (epd->bio) {
                int ret;
-                bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
-                                 epd->sync_io ? REQ_SYNC : 0);
                ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
                BUG_ON(ret < 0); /* -ENOMEM */
                epd->bio = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 128f3e58634f..d94e3f68b9b1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
                                                 const u64 offset,
                                                 const u64 bytes)
 {
+        unsigned long index = offset >> PAGE_SHIFT;
+        unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                index++;
+                if (!page)
+                        continue;
+                ClearPagePrivate2(page);
+                put_page(page);
+        }
        return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
                                            bytes - PAGE_SIZE, false);
 }
@@ -8357,11 +8369,8 @@ static void btrfs_endio_direct_read(struct bio *bio)
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        blk_status_t err = bio->bi_status;
-        if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) {
+        if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                err = btrfs_subio_endio_read(inode, io_bio, err);
-                if (!err)
-                        bio->bi_status = 0;
-        }
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
                      dip->logical_offset + dip->bytes - 1);
@@ -8369,7 +8378,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
        kfree(dip);
-        dio_bio->bi_status = bio->bi_status;
+        dio_bio->bi_status = err;
        dio_end_io(dio_bio);
        if (io_bio->end_io)
@@ -8387,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode,
        btrfs_work_func_t func;
        u64 ordered_offset = offset;
        u64 ordered_bytes = bytes;
+        u64 last_offset;
        int ret;
        if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -8398,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode,
        }
 again:
+        last_offset = ordered_offset;
        ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                   &ordered_offset,
                                                   ordered_bytes,
@@ -8409,6 +8420,12 @@ again:
        btrfs_queue_work(wq, &ordered->work);
 out_test:
        /*
+         * If btrfs_dec_test_ordered_pending does not find any ordered extent
+         * in the range, we can exit.
+         */
+        if (ordered_offset == last_offset)
+                return;
+        /*
         * our bio might span multiple ordered extents.  If we haven't
         * completed the accounting for the whole dio, go back and try again
         */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d6715c2bcdc4..6c7a49faf4e0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2773,9 +2773,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
        }
        mutex_unlock(&fs_devices->device_list_mutex);
-        fi_args->nodesize = fs_info->super_copy->nodesize;
+        fi_args->nodesize = fs_info->nodesize;
-        fi_args->sectorsize = fs_info->super_copy->sectorsize;
+        fi_args->sectorsize = fs_info->sectorsize;
-        fi_args->clone_alignment = fs_info->super_copy->sectorsize;
+        fi_args->clone_alignment = fs_info->sectorsize;
        if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
                ret = -EFAULT;
@@ -3032,7 +3032,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
 out:
        if (ret)
                btrfs_cmp_data_free(cmp);
-        return 0;
+        return ret;
 }
 static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
@@ -4061,6 +4061,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
                ret = PTR_ERR(new_root);
                goto out;
        }
+        if (!is_fstree(new_root->objectid)) {
+                ret = -ENOENT;
+                goto out;
+        }
        path = btrfs_alloc_path();
        if (!path) {
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5c8b61c86e61..e172d4843eae 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
        }
        ret = 0;
 out:
-        set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags);
        btrfs_free_path(path);
        return ret;
 }
@@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        if (!fs_info->quota_root)
                goto out;
        clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-        set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
        btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
@@ -1307,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                }
        }
        ret = del_qgroup_item(trans, quota_root, qgroupid);
+        if (ret && ret != -ENOENT)
+                goto out;
        while (!list_empty(&qgroup->groups)) {
                list = list_first_entry(&qgroup->groups,
@@ -2086,8 +2086,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
        if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
                set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-        if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags))
-                clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
        spin_lock(&fs_info->qgroup_lock);
        while (!list_empty(&fs_info->dirty_qgroups)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3a49a3c2fca4..9841faef08ea 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list)
        while (!list_empty(list)) {
                reloc_root = list_entry(list->next, struct btrfs_root,
                                        root_list);
+                __del_reloc_root(reloc_root);
                free_extent_buffer(reloc_root->node);
                free_extent_buffer(reloc_root->commit_root);
                reloc_root->node = NULL;
                reloc_root->commit_root = NULL;
-                __del_reloc_root(reloc_root);
        }
 }
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 32b043ef8ac9..8fd195cfe81b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2630,7 +2630,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
        } else {
                btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
                                (int)(mode & S_IFMT));
-                ret = -ENOTSUPP;
+                ret = -EOPNOTSUPP;
                goto out;
        }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ad7f4bab640b..c800d067fcbf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        struct extent_map *em, *n;
        struct list_head extents;
        struct extent_map_tree *tree = &inode->extent_tree;
+        u64 logged_start, logged_end;
        u64 test_gen;
        int ret = 0;
        int num = 0;
@@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        down_write(&inode->dio_sem);
        write_lock(&tree->lock);
        test_gen = root->fs_info->last_trans_committed;
+        logged_start = start;
+        logged_end = end;
        list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
                list_del_init(&em->list);
                /*
                 * Just an arbitrary number, this can be really CPU intensive
                 * once we start getting a lot of extents, and really once we
@@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                if (em->generation <= test_gen)
                        continue;
+                if (em->start < logged_start)
+                        logged_start = em->start;
+                if ((em->start + em->len - 1) > logged_end)
+                        logged_end = em->start + em->len - 1;
                /* Need a ref to keep it from getting evicted from cache */
                refcount_inc(&em->refs);
                set_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
        list_sort(NULL, &extents, extent_cmp);
-        btrfs_get_logged_extents(inode, logged_list, start, end);
+        btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
        /*
         * Some ordered extents started by fsync might have completed
         * before we could collect them into the list logged_list, which
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0e8f16c305df..b39737568c22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6166,7 +6166,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
        map_length = length;
        btrfs_bio_counter_inc_blocked(fs_info);
-        ret = __btrfs_map_block(fs_info, bio_op(bio), logical,
+        ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
                                &map_length, &bbio, mirror_num, 1);
        if (ret) {
                btrfs_bio_counter_dec(fs_info);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9dd6b836ac9e..f23c820daaed 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,7 +7,6 @@
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/utsname.h>
 #include <linux/ratelimit.h>
 #include "super.h"
@@ -735,12 +734,13 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                        inode = req->r_inode;
                        ihold(inode);
                } else {
-                        /* req->r_dentry is non-null for LSSNAP request.
+                        /* req->r_dentry is non-null for LSSNAP request */
-                         * fall-thru */
+                        rcu_read_lock();
-                        WARN_ON_ONCE(!req->r_dentry);
+                        inode = get_nonsnap_parent(req->r_dentry);
+                        rcu_read_unlock();
+                        dout("__choose_mds using snapdir's parent %p\n", inode);
                }
-        }
+        } else if (req->r_dentry) {
-        if (!inode && req->r_dentry) {
                /* ignore race with rename; old or new d_parent is okay */
                struct dentry *parent;
                struct inode *dir;
@@ -884,8 +884,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
        void *p;
        const char* metadata[][2] = {
-                {"hostname", utsname()->nodename},
+                {"hostname", mdsc->nodename},
-                {"kernel_version", utsname()->release},
+                {"kernel_version", init_utsname()->release},
                {"entity_id", opt->name ? : ""},
                {"root", fsopt->server_path ? : "/"},
                {NULL, NULL}
@@ -3539,6 +3539,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        init_rwsem(&mdsc->pool_perm_rwsem);
        mdsc->pool_perm_tree = RB_ROOT;
+        strncpy(mdsc->nodename, utsname()->nodename,
+                sizeof(mdsc->nodename) - 1);
        return 0;
 }
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index db57ae98ed34..636d6b2ec49c 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -8,6 +8,7 @@
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
+#include <linux/utsname.h>
 #include <linux/ceph/types.h>
 #include <linux/ceph/messenger.h>
@@ -368,6 +369,8 @@ struct ceph_mds_client {
        struct rw_semaphore     pool_perm_rwsem;
        struct rb_root          pool_perm_tree;
+        char nodename[__NEW_UTS_LEN + 1];
 };
 extern const char *ceph_mds_op_name(int op);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 1ffc8b426c1c..7fc0b850c352 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -374,12 +374,10 @@ static int build_snap_context(struct ceph_snap_realm *realm,
             realm->ino, realm, snapc, snapc->seq,
             (unsigned int) snapc->num_snaps);
-        if (realm->cached_context) {
+        ceph_put_snap_context(realm->cached_context);
-                ceph_put_snap_context(realm->cached_context);
-                /* queue realm for cap_snap creation */
-                list_add_tail(&realm->dirty_item, dirty_realms);
-        }
        realm->cached_context = snapc;
+        /* queue realm for cap_snap creation */
+        list_add_tail(&realm->dirty_item, dirty_realms);
        return 0;
 fail:
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9727e1dcacd5..cbb9534b89b4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
                        if ((ses->serverDomain == NULL) ||
                                (ses->serverOS == NULL) ||
                                (ses->serverNOS == NULL)) {
-                                seq_printf(m, "\n%d) entry for %s not fully "
+                                seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
-                                           "displayed\n\t", i, ses->serverName);
+                                        i, ses->serverName, ses->ses_count,
+                                        ses->capabilities, ses->status);
+                                if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
+                                        seq_printf(m, "Guest\t");
+                                else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+                                        seq_printf(m, "Anonymous\t");
                        } else {
                                seq_printf(m,
                                    "\n%d) Name: %s  Domain: %s Uses: %d OS:"
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 180b3356ff86..8c8b75d33f31 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_puts(s, ",nocase");
        if (tcon->retry)
                seq_puts(s, ",hard");
+        else
+                seq_puts(s, ",soft");
        if (tcon->use_persistent)
                seq_puts(s, ",persistenthandles");
        else if (tcon->use_resilient)
@@ -1447,7 +1449,7 @@ exit_cifs(void)
        exit_cifs_idmap();
 #endif
 #ifdef CONFIG_CIFS_UPCALL
-        unregister_key_type(&cifs_spnego_key_type);
+        exit_cifs_spnego();
 #endif
        cifs_destroy_request_bufs();
        cifs_destroy_mids();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30bf89b1fd9a..5a10e566f0e6 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION   "2.09"
+#define CIFS_VERSION   "2.10"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 808486c29f0d..de5b2e1fcce5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ enum smb_version {
 #ifdef CONFIG_CIFS_SMB311
        Smb_311,
 #endif /* SMB311 */
+        Smb_3any,
+        Smb_default,
        Smb_version_err
 };
@@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values;
 #define SMB21_VERSION_STRING    "2.1"
 extern struct smb_version_operations smb21_operations;
 extern struct smb_version_values smb21_values;
+#define SMBDEFAULT_VERSION_STRING "default"
+extern struct smb_version_values smbdefault_values;
+#define SMB3ANY_VERSION_STRING "3"
+extern struct smb_version_values smb3any_values;
 #define SMB30_VERSION_STRING    "3.0"
 extern struct smb_version_operations smb30_operations;
 extern struct smb_version_values smb30_values;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5aa2d278ca84..0bfc2280436d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = {
        { Smb_311, SMB311_VERSION_STRING },
        { Smb_311, ALT_SMB311_VERSION_STRING },
 #endif /* SMB311 */
+        { Smb_3any, SMB3ANY_VERSION_STRING },
+        { Smb_default, SMBDEFAULT_VERSION_STRING },
        { Smb_version_err, NULL }
 };
@@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
                vol->vals = &smb311_values;
                break;
 #endif /* SMB311 */
+        case Smb_3any:
+                vol->ops = &smb30_operations; /* currently identical with 3.0 */
+                vol->vals = &smb3any_values;
+                break;
+        case Smb_default:
+                vol->ops = &smb30_operations; /* currently identical with 3.0 */
+                vol->vals = &smbdefault_values;
+                break;
        default:
                cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
                return 1;
@@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
        vol->actimeo = CIFS_DEF_ACTIMEO;
-        /* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */
+        /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
-        vol->ops = &smb30_operations; /* both secure and accepted widely */
+        vol->ops = &smb30_operations;
-        vol->vals = &smb30_values;
+        vol->vals = &smbdefault_values;
        vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
@@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
        if (got_version == false)
                pr_warn("No dialect specified on mount. Default has changed to "
-                        "a more secure dialect, SMB3 (vers=3.0), from CIFS "
+                        "a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS "
                        "(SMB1). To use the less secure SMB1 dialect to access "
-                        "old servers which do not support SMB3 specify vers=1.0"
+                        "old servers which do not support SMB3 (or SMB2.1) specify vers=1.0"
-                        " on mount. For somewhat newer servers such as Windows "
+                        " on mount.\n");
-                        "7 try vers=2.1.\n");
        kfree(mountdata_copy);
        return 0;
@@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
        if (vol->nosharesock)
                return 0;
+        /* BB update this for smb3any and default case */
        if ((server->vals != vol->vals) || (server->ops != vol->ops))
                return 0;
@@ -4144,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
        cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n",
                 server->sec_mode, server->capabilities, server->timeAdj);
+        if (ses->auth_key.response) {
+                cifs_dbg(VFS, "Free previous auth_key.response = %p\n",
+                         ses->auth_key.response);
+                kfree(ses->auth_key.response);
+                ses->auth_key.response = NULL;
+                ses->auth_key.len = 0;
+        }
        if (server->ops->sess_setup)
                rc = server->ops->sess_setup(xid, ses, nls_info);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0786f19d288f..92fdf9c35de2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
        if (backup_cred(cifs_sb))
                create_options |= CREATE_OPEN_BACKUP_INTENT;
+        /* O_SYNC also has bit for O_DSYNC so following check picks up either */
+        if (f_flags & O_SYNC)
+                create_options |= CREATE_WRITE_THROUGH;
+        if (f_flags & O_DIRECT)
+                create_options |= CREATE_NO_BUFFER;
        oparms.tcon = tcon;
        oparms.cifs_sb = cifs_sb;
        oparms.desired_access = desired_access;
@@ -1102,8 +1109,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
        struct cifs_tcon *tcon;
        unsigned int num, max_num, max_buf;
        LOCKING_ANDX_RANGE *buf, *cur;
-        int types[] = {LOCKING_ANDX_LARGE_FILES,
+        static const int types[] = {
-                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+                LOCKING_ANDX_LARGE_FILES,
+                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+        };
        int i;
        xid = get_xid();
@@ -1434,8 +1443,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
                  unsigned int xid)
 {
        int rc = 0, stored_rc;
-        int types[] = {LOCKING_ANDX_LARGE_FILES,
+        static const int types[] = {
-                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+                LOCKING_ANDX_LARGE_FILES,
+                LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
+        };
        unsigned int i;
        unsigned int max_num, num, max_buf;
        LOCKING_ANDX_RANGE *buf, *cur;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8693632235f..7c732cb44164 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
        fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
        fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
        fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
+        /* old POSIX extensions don't get create time */
        fattr->cf_mode = le64_to_cpu(info->Permissions);
        /*
@@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
        stat->blksize = CIFS_MAX_MSGSIZE;
        stat->ino = CIFS_I(inode)->uniqueid;
+        /* old CIFS Unix Extensions doesn't return create time */
+        if (CIFS_I(inode)->createtime) {
+                stat->result_mask |= STATX_BTIME;
+                stat->btime =
+                      cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime));
+        }
+        stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED);
+        if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED)
+                stat->attributes |= STATX_ATTR_COMPRESSED;
+        if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED)
+                stat->attributes |= STATX_ATTR_ENCRYPTED;
        /*
         * If on a multiuser mount without unix extensions or cifsacl being
         * enabled, and the admin hasn't overridden them, set the ownership
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index fb2934b9b97c..0dafdbae1f8c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
        return rc;
 }
+#ifdef CONFIG_CIFS_XATTR
 static ssize_t
 move_smb2_ea_to_cifs(char *dst, size_t dst_size,
                     struct smb2_file_full_ea_info *src, size_t src_size,
@@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
        return rc;
 }
+#endif
 static bool
 smb2_can_echo(struct TCP_Server_Info *server)
@@ -3110,6 +3112,46 @@ struct smb_version_values smb21_values = {
        .create_lease_size = sizeof(struct create_lease),
 };
+struct smb_version_values smb3any_values = {
+        .version_string = SMB3ANY_VERSION_STRING,
+        .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+        .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+        .large_lock_type = 0,
+        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+        .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+        .header_size = sizeof(struct smb2_hdr),
+        .max_header_size = MAX_SMB2_HDR_SIZE,
+        .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+        .lock_cmd = SMB2_LOCK,
+        .cap_unix = 0,
+        .cap_nt_find = SMB2_NT_FIND,
+        .cap_large_files = SMB2_LARGE_FILES,
+        .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+        .create_lease_size = sizeof(struct create_lease_v2),
+};
+struct smb_version_values smbdefault_values = {
+        .version_string = SMBDEFAULT_VERSION_STRING,
+        .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
+        .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+        .large_lock_type = 0,
+        .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
+        .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
+        .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
+        .header_size = sizeof(struct smb2_hdr),
+        .max_header_size = MAX_SMB2_HDR_SIZE,
+        .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+        .lock_cmd = SMB2_LOCK,
+        .cap_unix = 0,
+        .cap_nt_find = SMB2_NT_FIND,
+        .cap_large_files = SMB2_LARGE_FILES,
+        .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
+        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+        .create_lease_size = sizeof(struct create_lease_v2),
+};
 struct smb_version_values smb30_values = {
        .version_string = SMB30_VERSION_STRING,
        .protocol_id = SMB30_PROT_ID,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5531e7ee1210..6f0e6343c15e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req)
        build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
        req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
        req->NegotiateContextCount = cpu_to_le16(2);
-        inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2
+        inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context)
                        + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */
 }
 #else
@@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
        req->hdr.sync_hdr.SessionId = 0;
-        req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
+        if (strcmp(ses->server->vals->version_string,
+                   SMB3ANY_VERSION_STRING) == 0) {
-        req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */
+                req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
-        inc_rfc1001_len(req, 2);
+                req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+                req->DialectCount = cpu_to_le16(2);
+                inc_rfc1001_len(req, 4);
+        } else if (strcmp(ses->server->vals->version_string,
+                   SMBDEFAULT_VERSION_STRING) == 0) {
+                req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+                req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+                req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+                req->DialectCount = cpu_to_le16(3);
+                inc_rfc1001_len(req, 6);
+        } else {
+                /* otherwise send specific dialect */
+                req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id);
+                req->DialectCount = cpu_to_le16(1);
+                inc_rfc1001_len(req, 2);
+        }
        /* only one of SMB2 signing flags may be set in SMB2 request */
        if (ses->sign)
@@ -528,16 +543,43 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
         */
        if (rc == -EOPNOTSUPP) {
                cifs_dbg(VFS, "Dialect not supported by server. Consider "
-                        "specifying vers=1.0 or vers=2.1 on mount for accessing"
+                        "specifying vers=1.0 or vers=2.0 on mount for accessing"
                        " older servers\n");
                goto neg_exit;
        } else if (rc != 0)
                goto neg_exit;
+        if (strcmp(ses->server->vals->version_string,
+                   SMB3ANY_VERSION_STRING) == 0) {
+                if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+                        cifs_dbg(VFS,
+                                "SMB2 dialect returned but not requested\n");
+                        return -EIO;
+                } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+                        cifs_dbg(VFS,
+                                "SMB2.1 dialect returned but not requested\n");
+                        return -EIO;
+                }
+        } else if (strcmp(ses->server->vals->version_string,
+                   SMBDEFAULT_VERSION_STRING) == 0) {
+                if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) {
+                        cifs_dbg(VFS,
+                                "SMB2 dialect returned but not requested\n");
+                        return -EIO;
+                } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) {
+                        /* ops set to 3.0 by default for default so update */
+                        ses->server->ops = &smb21_operations;
+                }
+        } else if (le16_to_cpu(rsp->DialectRevision) !=
+                                ses->server->vals->protocol_id) {
+                /* if requested single dialect ensure returned dialect matched */
+                cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n",
+                        le16_to_cpu(rsp->DialectRevision));
+                return -EIO;
+        }
        cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode);
-        /* BB we may eventually want to match the negotiated vs. requested
-           dialect, even though we are only requesting one at a time */
        if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID))
                cifs_dbg(FYI, "negotiated smb2.0 dialect\n");
        else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID))
@@ -558,6 +600,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
        }
        server->dialect = le16_to_cpu(rsp->DialectRevision);
+        /* BB: add check that dialect was valid given dialect(s) we asked for */
        /* SMB2 only has an extended negflavor */
        server->negflavor = CIFS_NEGFLAVOR_EXTENDED;
        /* set it to the maximum buffer size value we can send with 1 credit */
@@ -606,20 +650,28 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
        struct validate_negotiate_info_req vneg_inbuf;
        struct validate_negotiate_info_rsp *pneg_rsp;
        u32 rsplen;
+        u32 inbuflen; /* max of 4 dialects */
        cifs_dbg(FYI, "validate negotiate\n");
        /*
         * validation ioctl must be signed, so no point sending this if we
-         * can not sign it.  We could eventually change this to selectively
+         * can not sign it (ie are not known user).  Even if signing is not
+         * required (enabled but not negotiated), in those cases we selectively
         * sign just this, the first and only signed request on a connection.
-         * This is good enough for now since a user who wants better security
+         * Having validation of negotiate info  helps reduce attack vectors.
-         * would also enable signing on the mount. Having validation of
-         * negotiate info for signed connections helps reduce attack vectors
         */
-        if (tcon->ses->server->sign == false)
+        if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
                return 0; /* validation requires signing */
+        if (tcon->ses->user_name == NULL) {
+                cifs_dbg(FYI, "Can't validate negotiate: null user mount\n");
+                return 0; /* validation requires signing */
+        }
+        if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
+                cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n");
        vneg_inbuf.Capabilities =
                        cpu_to_le32(tcon->ses->server->vals->req_capabilities);
        memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid,
@@ -634,9 +686,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
        else
                vneg_inbuf.SecurityMode = 0;
-        vneg_inbuf.DialectCount = cpu_to_le16(1);
-        vneg_inbuf.Dialects[0] =
+        if (strcmp(tcon->ses->server->vals->version_string,
-                cpu_to_le16(tcon->ses->server->vals->protocol_id);
+                SMB3ANY_VERSION_STRING) == 0) {
+                vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
+                vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
+                vneg_inbuf.DialectCount = cpu_to_le16(2);
+                /* structure is big enough for 3 dialects, sending only 2 */
+                inbuflen = sizeof(struct validate_negotiate_info_req) - 2;
+        } else if (strcmp(tcon->ses->server->vals->version_string,
+                SMBDEFAULT_VERSION_STRING) == 0) {
+                vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
+                vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID);
+                vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
+                vneg_inbuf.DialectCount = cpu_to_le16(3);
+                /* structure is big enough for 3 dialects */
+                inbuflen = sizeof(struct validate_negotiate_info_req);
+        } else {
+                /* otherwise specific dialect was requested */
+                vneg_inbuf.Dialects[0] =
+                        cpu_to_le16(tcon->ses->server->vals->protocol_id);
+                vneg_inbuf.DialectCount = cpu_to_le16(1);
+                /* structure is big enough for 3 dialects, sending only 1 */
+                inbuflen = sizeof(struct validate_negotiate_info_req) - 4;
+        }
        rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
                FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
@@ -1110,6 +1183,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
        while (sess_data->func)
                sess_data->func(sess_data);
+        if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign))
+                cifs_dbg(VFS, "signing requested but authenticated as guest\n");
        rc = sess_data->result;
 out:
        kfree(sess_data);
@@ -1634,7 +1709,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
        struct cifs_tcon *tcon = oparms->tcon;
        struct cifs_ses *ses = tcon->ses;
        struct kvec iov[4];
-        struct kvec rsp_iov;
+        struct kvec rsp_iov = {NULL, 0};
        int resp_buftype;
        int uni_path_len;
        __le16 *copy_path = NULL;
@@ -1763,7 +1838,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
        if (rc != 0) {
                cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
-                if (err_buf)
+                if (err_buf && rsp)
                        *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
                                           GFP_KERNEL);
                goto creat_exit;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 393ed5f4e1b6..6c9653a130c8 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -716,7 +716,7 @@ struct validate_negotiate_info_req {
        __u8   Guid[SMB2_CLIENT_GUID_SIZE];
        __le16 SecurityMode;
        __le16 DialectCount;
-        __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */
+        __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */
 } __packed;
 struct validate_negotiate_info_rsp {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 5fa2211e49ae..62cf812ed0e5 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
 {
        loff_t offset = dio->iocb->ki_pos;
        ssize_t transferred = 0;
+        int err;
        /*
         * AIO submission can race with bio completion to get here while
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
        if (ret == 0)
                ret = transferred;
+        /*
+         * Try again to invalidate clean pages which might have been cached by
+         * non-direct readahead, or faulted in by get_user_pages() if the source
+         * of the write was an mmap'ed region of the file we're writing.  Either
+         * one is a pretty crazy thing to do, so we don't support it 100%.  If
+         * this invalidation fails, tough, the write still worked...
+         */
+        if (ret > 0 && dio->op == REQ_OP_WRITE &&
+            dio->inode->i_mapping->nrpages) {
+                err = invalidate_inode_pages2_range(dio->inode->i_mapping,
+                                        offset >> PAGE_SHIFT,
+                                        (offset + ret - 1) >> PAGE_SHIFT);
+                WARN_ON_ONCE(err);
+        }
        if (dio->end_io) {
-                int err;
                // XXX: ki_pos??
                err = dio->end_io(dio->iocb, offset, ret, dio->private);
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
        struct dio *dio = bio->bi_private;
        unsigned long remaining;
        unsigned long flags;
+        bool defer_completion = false;
        /* cleanup the bio */
        dio_bio_complete(dio, bio);
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
        spin_unlock_irqrestore(&dio->bio_lock, flags);
        if (remaining == 0) {
-                if (dio->result && dio->defer_completion) {
+                /*
+                 * Defer completion when defer_completion is set or
+                 * when the inode has pages mapped and this is AIO write.
+                 * We need to invalidate those pages because there is a
+                 * chance they contain stale data in the case buffered IO
+                 * went in between AIO submission and completion into the
+                 * same region.
+                 */
+                if (dio->result)
+                        defer_completion = dio->defer_completion ||
+                                           (dio->op == REQ_OP_WRITE &&
+                                            dio->inode->i_mapping->nrpages);
+                if (defer_completion) {
                        INIT_WORK(&dio->complete_work, dio_aio_complete_work);
                        queue_work(dio->inode->i_sb->s_dio_done_wq,
                                   &dio->complete_work);
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
         * so that we can call ->fsync.
         */
-        if (dio->is_async && iov_iter_rw(iter) == WRITE &&
+        if (dio->is_async && iov_iter_rw(iter) == WRITE) {
-            ((iocb->ki_filp->f_flags & O_DSYNC) ||
+                retval = 0;
-             IS_SYNC(iocb->ki_filp->f_mapping->host))) {
+                if ((iocb->ki_filp->f_flags & O_DSYNC) ||
-                retval = dio_set_defer_completion(dio);
+                    IS_SYNC(iocb->ki_filp->f_mapping->host))
+                        retval = dio_set_defer_completion(dio);
+                else if (!dio->inode->i_sb->s_dio_done_wq) {
+                        /*
+                         * In case of AIO write racing with buffered read we
+                         * need to defer completion. We can't decide this now,
+                         * however the workqueue needs to be initialized here.
+                         */
+                        retval = sb_init_dio_done_wq(dio->inode->i_sb);
+                }
                if (retval) {
                        /*
                         * We grab i_mutex only for reads so we don't have
diff --git a/fs/exec.c b/fs/exec.c
index ac34d9724684..5470d3c1892a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1410,7 +1410,7 @@ static void free_bprm(struct linux_binprm *bprm)
        kfree(bprm);
 }
-int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
 {
        /* If a binfmt changed the interp, free it first. */
        if (bprm->interp != bprm->filename)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 0491da3b28c3..448a1119f0be 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p,
                         * specific si_codes.  In that case use SI_SIGIO instead
                         * to remove the ambiguity.
                         */
-                        if (sig_specific_sicodes(signum))
+                        if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
                                si.si_code = SI_SIGIO;
                        /* Make sure we are called with one of the POLL_*
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 98e845b7841b..11066d8647d2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1945,13 +1945,9 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
 {
        struct gfs2_glock_iter *gi = seq->private;
        loff_t n = *pos;
-        int ret;
-        if (gi->last_pos <= *pos)
-                n = (*pos - gi->last_pos);
-        ret = rhashtable_walk_start(&gi->hti);
+        rhashtable_walk_enter(&gl_hash_table, &gi->hti);
-        if (ret)
+        if (rhashtable_walk_start(&gi->hti) != 0)
                return NULL;
        do {
@@ -1959,6 +1955,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
        } while (gi->gl && n--);
        gi->last_pos = *pos;
        return gi->gl;
 }
@@ -1970,6 +1967,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
        (*pos)++;
        gi->last_pos = *pos;
        gfs2_glock_iter_next(gi);
        return gi->gl;
 }
@@ -1980,6 +1978,7 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
        gi->gl = NULL;
        rhashtable_walk_stop(&gi->hti);
+        rhashtable_walk_exit(&gi->hti);
 }
 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
@@ -2042,12 +2041,10 @@ static int __gfs2_glocks_open(struct inode *inode, struct file *file,
                struct gfs2_glock_iter *gi = seq->private;
                gi->sdp = inode->i_private;
-                gi->last_pos = 0;
                seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
                if (seq->buf)
                        seq->size = GFS2_SEQ_GOODSIZE;
                gi->gl = NULL;
-                rhashtable_walk_enter(&gl_hash_table, &gi->hti);
        }
        return ret;
 }
@@ -2063,7 +2060,6 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
        struct gfs2_glock_iter *gi = seq->private;
        gi->gl = NULL;
-        rhashtable_walk_exit(&gi->hti);
        return seq_release_private(inode, file);
 }
diff --git a/fs/iomap.c b/fs/iomap.c
index 269b24a01f32..be61cf742b5e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -713,8 +713,24 @@ struct iomap_dio {
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
        struct kiocb *iocb = dio->iocb;
+        struct inode *inode = file_inode(iocb->ki_filp);
        ssize_t ret;
+        /*
+         * Try again to invalidate clean pages which might have been cached by
+         * non-direct readahead, or faulted in by get_user_pages() if the source
+         * of the write was an mmap'ed region of the file we're writing.  Either
+         * one is a pretty crazy thing to do, so we don't support it 100%.  If
+         * this invalidation fails, tough, the write still worked...
+         */
+        if (!dio->error &&
+            (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+                ret = invalidate_inode_pages2_range(inode->i_mapping,
+                                iocb->ki_pos >> PAGE_SHIFT,
+                                (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
+                WARN_ON_ONCE(ret);
+        }
        if (dio->end_io) {
                ret = dio->end_io(iocb,
                                dio->error ? dio->error : dio->size,
@@ -993,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        WARN_ON_ONCE(ret);
        ret = 0;
+        if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+            !inode->i_sb->s_dio_done_wq) {
+                ret = sb_init_dio_done_wq(inode->i_sb);
+                if (ret < 0)
+                        goto out_free_dio;
+        }
        inode_dio_begin(inode);
        blk_start_plug(&plug);
@@ -1015,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (ret < 0)
                iomap_dio_set_error(dio, ret);
-        if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
-                        !inode->i_sb->s_dio_done_wq) {
-                ret = sb_init_dio_done_wq(inode->i_sb);
-                if (ret < 0)
-                        iomap_dio_set_error(dio, ret);
-        }
        if (!atomic_dec_and_test(&dio->ref)) {
                if (!is_sync_kiocb(iocb))
                        return -EIOCBQUEUED;
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        ret = iomap_dio_complete(dio);
-        /*
-         * Try again to invalidate clean pages which might have been cached by
-         * non-direct readahead, or faulted in by get_user_pages() if the source
-         * of the write was an mmap'ed region of the file we're writing.  Either
-         * one is a pretty crazy thing to do, so we don't support it 100%.  If
-         * this invalidation fails, tough, the write still worked...
-         */
-        if (iov_iter_rw(iter) == WRITE) {
-                int err = invalidate_inode_pages2_range(mapping,
-                                start >> PAGE_SHIFT, end >> PAGE_SHIFT);
-                WARN_ON_ONCE(err);
-        }
        return ret;
 out_free_dio:
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index db692f554158..447a24d77b89 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
        if (sbi->s_fmode != ISOFS_INVALID_MODE)
                seq_printf(m, ",fmode=%o", sbi->s_fmode);
+#ifdef CONFIG_JOLIET
        if (sbi->s_nls_iocharset &&
            strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
                seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+#endif
        return 0;
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 54059b142d6b..3b601f115b6c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -468,7 +468,9 @@ static inline int may_write_real(struct file *file)
        /* File refers to upper, writable layer? */
        upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
-        if (upperdentry && file_inode(file) == d_inode(upperdentry))
+        if (upperdentry &&
+            (file_inode(file) == d_inode(upperdentry) ||
+             file_inode(file) == d_inode(dentry)))
                return 0;
        /* Lower layer: can't write to real file, sorry... */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index aad97b30d5e6..c441f9387a1b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -561,10 +561,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
                c->tmpfile = true;
                err = ovl_copy_up_locked(c);
        } else {
-                err = -EIO;
+                err = ovl_lock_rename_workdir(c->workdir, c->destdir);
-                if (lock_rename(c->workdir, c->destdir) != NULL) {
+                if (!err) {
-                        pr_err("overlayfs: failed to lock workdir+upperdir\n");
-                } else {
                        err = ovl_copy_up_locked(c);
                        unlock_rename(c->workdir, c->destdir);
                }
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 3309b1912241..cc961a3bd3bd 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -216,26 +216,6 @@ out_unlock:
        return err;
 }
-static int ovl_lock_rename_workdir(struct dentry *workdir,
-                                   struct dentry *upperdir)
-{
-        /* Workdir should not be the same as upperdir */
-        if (workdir == upperdir)
-                goto err;
-        /* Workdir should not be subdir of upperdir and vice versa */
-        if (lock_rename(workdir, upperdir) != NULL)
-                goto err_unlock;
-        return 0;
-err_unlock:
-        unlock_rename(workdir, upperdir);
-err:
-        pr_err("overlayfs: failed to lock workdir+upperdir\n");
-        return -EIO;
-}
 static struct dentry *ovl_clear_empty(struct dentry *dentry,
                                      struct list_head *list)
 {
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c3addd1114f1..654bea1a5ac9 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -506,6 +506,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry,
        index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
        if (IS_ERR(index)) {
+                err = PTR_ERR(index);
                pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n"
                                    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
                                    d_inode(origin)->i_ino, name.len, name.name,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index d4e8c1a08fb0..c706a6f99928 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -235,6 +235,7 @@ bool ovl_inuse_trylock(struct dentry *dentry);
 void ovl_inuse_unlock(struct dentry *dentry);
 int ovl_nlink_start(struct dentry *dentry, bool *locked);
 void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
 static inline bool ovl_is_impuredir(struct dentry *dentry)
 {
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 878a750986dd..25d9b5adcd42 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -37,6 +37,9 @@ struct ovl_fs {
        bool noxattr;
        /* sb common to all layers */
        struct super_block *same_sb;
+        /* Did we take the inuse lock? */
+        bool upperdir_locked;
+        bool workdir_locked;
 };
 /* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 62e9b22a2077..0f85ee9c3268 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -988,6 +988,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
                         struct path *lowerstack, unsigned int numlower)
 {
        int err;
+        struct dentry *index = NULL;
        struct inode *dir = dentry->d_inode;
        struct path path = { .mnt = mnt, .dentry = dentry };
        LIST_HEAD(list);
@@ -1007,8 +1008,6 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
        inode_lock_nested(dir, I_MUTEX_PARENT);
        list_for_each_entry(p, &list, l_node) {
-                struct dentry *index;
                if (p->name[0] == '.') {
                        if (p->len == 1)
                                continue;
@@ -1018,6 +1017,7 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
                index = lookup_one_len(p->name, dentry, p->len);
                if (IS_ERR(index)) {
                        err = PTR_ERR(index);
+                        index = NULL;
                        break;
                }
                err = ovl_verify_index(index, lowerstack, numlower);
@@ -1029,7 +1029,9 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt,
                                break;
                }
                dput(index);
+                index = NULL;
        }
+        dput(index);
        inode_unlock(dir);
 out:
        ovl_cache_free(&list);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index fd5ea4facc62..092d150643c1 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -211,9 +211,10 @@ static void ovl_put_super(struct super_block *sb)
        dput(ufs->indexdir);
        dput(ufs->workdir);
-        ovl_inuse_unlock(ufs->workbasedir);
+        if (ufs->workdir_locked)
+                ovl_inuse_unlock(ufs->workbasedir);
        dput(ufs->workbasedir);
-        if (ufs->upper_mnt)
+        if (ufs->upper_mnt && ufs->upperdir_locked)
                ovl_inuse_unlock(ufs->upper_mnt->mnt_root);
        mntput(ufs->upper_mnt);
        for (i = 0; i < ufs->numlower; i++)
@@ -881,9 +882,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                        goto out_put_upperpath;
                err = -EBUSY;
-                if (!ovl_inuse_trylock(upperpath.dentry)) {
+                if (ovl_inuse_trylock(upperpath.dentry)) {
-                        pr_err("overlayfs: upperdir is in-use by another mount\n");
+                        ufs->upperdir_locked = true;
+                } else if (ufs->config.index) {
+                        pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
                        goto out_put_upperpath;
+                } else {
+                        pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
                }
                err = ovl_mount_dir(ufs->config.workdir, &workpath);
@@ -901,9 +906,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                }
                err = -EBUSY;
-                if (!ovl_inuse_trylock(workpath.dentry)) {
+                if (ovl_inuse_trylock(workpath.dentry)) {
-                        pr_err("overlayfs: workdir is in-use by another mount\n");
+                        ufs->workdir_locked = true;
+                } else if (ufs->config.index) {
+                        pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
                        goto out_put_workpath;
+                } else {
+                        pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
                }
                ufs->workbasedir = workpath.dentry;
@@ -1156,11 +1165,13 @@ out_put_lowerpath:
 out_free_lowertmp:
        kfree(lowertmp);
 out_unlock_workdentry:
-        ovl_inuse_unlock(workpath.dentry);
+        if (ufs->workdir_locked)
+                ovl_inuse_unlock(workpath.dentry);
 out_put_workpath:
        path_put(&workpath);
 out_unlock_upperdentry:
-        ovl_inuse_unlock(upperpath.dentry);
+        if (ufs->upperdir_locked)
+                ovl_inuse_unlock(upperpath.dentry);
 out_put_upperpath:
        path_put(&upperpath);
 out_free_config:
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 117794582f9f..b9b239fa5cfd 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -430,7 +430,7 @@ void ovl_inuse_unlock(struct dentry *dentry)
        }
 }
-/* Called must hold OVL_I(inode)->oi_lock */
+/* Caller must hold OVL_I(inode)->lock */
 static void ovl_cleanup_index(struct dentry *dentry)
 {
        struct inode *dir = ovl_indexdir(dentry->d_sb)->d_inode;
@@ -469,6 +469,9 @@ static void ovl_cleanup_index(struct dentry *dentry)
        err = PTR_ERR(index);
        if (!IS_ERR(index))
                err = ovl_cleanup(dir, index);
+        else
+                index = NULL;
        inode_unlock(dir);
        if (err)
                goto fail;
@@ -557,3 +560,22 @@ void ovl_nlink_end(struct dentry *dentry, bool locked)
                mutex_unlock(&OVL_I(d_inode(dentry))->lock);
        }
 }
+int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
+{
+        /* Workdir should not be the same as upperdir */
+        if (workdir == upperdir)
+                goto err;
+        /* Workdir should not be subdir of upperdir and vice versa */
+        if (lock_rename(workdir, upperdir) != NULL)
+                goto err_unlock;
+        return 0;
+err_unlock:
+        unlock_rename(workdir, upperdir);
+err:
+        pr_err("overlayfs: failed to lock workdir+upperdir\n");
+        return -EIO;
+}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c355574aa0..77a8eacbe032 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -62,6 +62,7 @@
 #include <linux/mman.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/task_stack.h>
 #include <linux/sched/task.h>
 #include <linux/sched/cputime.h>
 #include <linux/proc_fs.h>
@@ -118,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
 * simple bit tests.
 */
 static const char * const task_state_array[] = {
-        "R (running)",          /*   0 */
-        "S (sleeping)",         /*   1 */
+        /* states in TASK_REPORT: */
-        "D (disk sleep)",       /*   2 */
+        "R (running)",          /* 0x00 */
-        "T (stopped)",          /*   4 */
+        "S (sleeping)",         /* 0x01 */
-        "t (tracing stop)",     /*   8 */
+        "D (disk sleep)",       /* 0x02 */
-        "X (dead)",             /*  16 */
+        "T (stopped)",          /* 0x04 */
-        "Z (zombie)",           /*  32 */
+        "t (tracing stop)",     /* 0x08 */
+        "X (dead)",             /* 0x10 */
+        "Z (zombie)",           /* 0x20 */
+        "P (parked)",           /* 0x40 */
+        /* states beyond TASK_REPORT: */
+        "I (idle)",             /* 0x80 */
 };
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-        unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
+        BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
+        return task_state_array[__get_task_state(tsk)];
-        /*
-         * Parked tasks do not run; they sit in __kthread_parkme().
-         * Without this check, we would report them as running, which is
-         * clearly wrong, so we report them as sleeping instead.
-         */
-        if (tsk->state == TASK_PARKED)
-                state = TASK_INTERRUPTIBLE;
-        BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
-        return task_state_array[fls(state)];
 }
 static inline int get_task_umask(struct task_struct *tsk)
@@ -421,7 +417,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                 * esp and eip are intentionally zeroed out.  There is no
                 * non-racy way to read them without freezing the task.
                 * Programs that need reliable values can use ptrace(2).
+                 *
+                 * The only exception is if the task is core dumping because
+                 * a program is not able to use ptrace(2) in that case. It is
+                 * safe because the task has stopped executing permanently.
                 */
+                if (permitted && (task->flags & PF_DUMPCORE)) {
+                        eip = KSTK_EIP(task);
+                        esp = KSTK_ESP(task);
+                }
        }
        get_task_comm(tcomm, task);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8381db9db6d9..50b0556a124f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
                ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
                                      &warn_to[cnt]);
                if (ret) {
+                        spin_lock(&transfer_to[cnt]->dq_dqb_lock);
                        dquot_decr_inodes(transfer_to[cnt], inode_usage);
+                        spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
                        goto over_quota;
                }
        }
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index c0187cda2c1e..a73e5b34db41 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
        if (!dquot->dq_off) {
                alloc = true;
                down_write(&dqopt->dqio_sem);
+        } else {
+                down_read(&dqopt->dqio_sem);
        }
        ret = qtree_write_dquot(
                        sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
                        dquot);
        if (alloc)
                up_write(&dqopt->dqio_sem);
+        else
+                up_read(&dqopt->dqio_sem);
        return ret;
 }
diff --git a/fs/read_write.c b/fs/read_write.c
index a2b9a47235c5..f0d4b16873e8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                 * In the generic case the entire file is data, so as long as
                 * offset isn't at the end of the file then the offset is data.
                 */
-                if (offset >= eof)
+                if ((unsigned long long)offset >= eof)
                        return -ENXIO;
                break;
        case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                 * There is a virtual hole at the end of the file, so as long as
                 * offset isn't i_size or larger, return i_size.
                 */
-                if (offset >= eof)
+                if ((unsigned long long)offset >= eof)
                        return -ENXIO;
                offset = eof;
                break;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ef4b48d1ea42..1c713fd5b3e6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -588,6 +588,12 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                        break;
                if (ACCESS_ONCE(ctx->released) ||
                    fatal_signal_pending(current)) {
+                        /*
+                         * &ewq->wq may be queued in fork_event, but
+                         * __remove_wait_queue ignores the head
+                         * parameter. It would be a problem if it
+                         * didn't.
+                         */
                        __remove_wait_queue(&ctx->event_wqh, &ewq->wq);
                        if (ewq->msg.event == UFFD_EVENT_FORK) {
                                struct userfaultfd_ctx *new;
@@ -1061,6 +1067,12 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
                                        (unsigned long)
                                        uwq->msg.arg.reserved.reserved1;
                                list_move(&uwq->wq.entry, &fork_event);
+                                /*
+                                 * fork_nctx can be freed as soon as
+                                 * we drop the lock, unless we take a
+                                 * reference on it.
+                                 */
+                                userfaultfd_ctx_get(fork_nctx);
                                spin_unlock(&ctx->event_wqh.lock);
                                ret = 0;
                                break;
@@ -1091,19 +1103,53 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
        if (!ret && msg->event == UFFD_EVENT_FORK) {
                ret = resolve_userfault_fork(ctx, fork_nctx, msg);
+                spin_lock(&ctx->event_wqh.lock);
+                if (!list_empty(&fork_event)) {
+                        /*
+                         * The fork thread didn't abort, so we can
+                         * drop the temporary refcount.
+                         */
+                        userfaultfd_ctx_put(fork_nctx);
+                        uwq = list_first_entry(&fork_event,
+                                               typeof(*uwq),
+                                               wq.entry);
+                        /*
+                         * If fork_event list wasn't empty and in turn
+                         * the event wasn't already released by fork
+                         * (the event is allocated on fork kernel
+                         * stack), put the event back to its place in
+                         * the event_wq. fork_event head will be freed
+                         * as soon as we return so the event cannot
+                         * stay queued there no matter the current
+                         * "ret" value.
+                         */
+                        list_del(&uwq->wq.entry);
+                        __add_wait_queue(&ctx->event_wqh, &uwq->wq);
-                if (!ret) {
+                        /*
-                        spin_lock(&ctx->event_wqh.lock);
+                         * Leave the event in the waitqueue and report
-                        if (!list_empty(&fork_event)) {
+                         * error to userland if we failed to resolve
-                                uwq = list_first_entry(&fork_event,
+                         * the userfault fork.
-                                                       typeof(*uwq),
+                         */
-                                                       wq.entry);
+                        if (likely(!ret))
-                                list_del(&uwq->wq.entry);
-                                __add_wait_queue(&ctx->event_wqh, &uwq->wq);
                                userfaultfd_event_complete(ctx, uwq);
-                        }
+                } else {
-                        spin_unlock(&ctx->event_wqh.lock);
+                        /*
+                         * Here the fork thread aborted and the
+                         * refcount from the fork thread on fork_nctx
+                         * has already been released. We still hold
+                         * the reference we took before releasing the
+                         * lock above. If resolve_userfault_fork
+                         * failed we've to drop it because the
+                         * fork_nctx has to be freed in such case. If
+                         * it succeeded we'll hold it because the new
+                         * uffd references it.
+                         */
+                        if (ret)
+                                userfaultfd_ctx_put(fork_nctx);
                }
+                spin_unlock(&ctx->event_wqh.lock);
        }
        return ret;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4424f7fecf14..61cd28ba25f3 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -250,7 +250,7 @@ xattr_getsecurity(struct inode *inode, const char *name, void *value,
        }
        memcpy(value, buffer, len);
 out:
-        security_release_secctx(buffer, len);
+        kfree(buffer);
 out_noalloc:
        return len;
 }
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index b008ff3250eb..df3e600835e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
        trace_xfs_ag_resv_free(pag, type, 0);
        resv = xfs_perag_resv(pag, type);
-        pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+        if (pag->pag_agno == 0)
+                pag->pag_mount->m_ag_max_usable += resv->ar_asked;
        /*
         * AGFL blocks are always considered "free", so whatever
         * was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
                return error;
        }
-        mp->m_ag_max_usable -= ask;
+        /*
+         * Reduce the maximum per-AG allocation length by however much we're
+         * trying to reserve for an AG.  Since this is a filesystem-wide
+         * counter, we only make the adjustment for AG 0.  This assumes that
+         * there aren't any AGs hungrier for per-AG reservation than AG 0.
+         */
+        if (pag->pag_agno == 0)
+                mp->m_ag_max_usable -= ask;
        resv = xfs_perag_resv(pag, type);
        resv->ar_asked = ask;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 459f4b4f08fe..044a363119be 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -49,7 +49,6 @@
 #include "xfs_rmap.h"
 #include "xfs_ag_resv.h"
 #include "xfs_refcount.h"
-#include "xfs_rmap_btree.h"
 #include "xfs_icache.h"
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
        int             maxrecs;        /* maximum record count at this level */
        xfs_mount_t     *mp;            /* mount structure */
        xfs_filblks_t   rval;           /* return value */
-        xfs_filblks_t   orig_len;
        mp = ip->i_mount;
-        /* Calculate the worst-case size of the bmbt. */
-        orig_len = len;
        maxrecs = mp->m_bmap_dmxr[0];
        for (level = 0, rval = 0;
             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
                len += maxrecs - 1;
                do_div(len, maxrecs);
                rval += len;
-                if (len == 1) {
+                if (len == 1)
-                        rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+                        return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
                                level - 1;
-                        break;
-                }
                if (level == 0)
                        maxrecs = mp->m_bmap_dmxr[1];
        }
-        /* Calculate the worst-case size of the rmapbt. */
-        if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-                rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
-                                mp->m_rmap_maxlevels;
        return rval;
 }
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29172609f2a3..f18e5932aec4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -343,7 +343,8 @@ xfs_end_io(
                error = xfs_reflink_end_cow(ip, offset, size);
                break;
        case XFS_IO_UNWRITTEN:
-                error = xfs_iomap_write_unwritten(ip, offset, size);
+                /* writeback should never update isize */
+                error = xfs_iomap_write_unwritten(ip, offset, size, false);
                break;
        default:
                ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index cd9a5400ba4f..e9db7fc95b70 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
                return error;
        /*
-         * The extent shiting code works on extent granularity. So, if
+         * Clean out anything hanging around in the cow fork now that
+         * we've flushed all the dirty data out to disk to avoid having
+         * CoW extents at the wrong offsets.
+         */
+        if (xfs_is_reflink_inode(ip)) {
+                error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
+                                true);
+                if (error)
+                        return error;
+        }
+        /*
+         * The extent shifting code works on extent granularity. So, if
         * stop_fsb is not the starting block of extent, we need to split
         * the extent at stop_fsb.
         */
@@ -2110,11 +2122,31 @@ xfs_swap_extents(
                ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
                tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
                tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
+        }
+        /* Swap the cow forks. */
+        if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+                xfs_extnum_t    extnum;
+                ASSERT(ip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+                ASSERT(tip->i_cformat == XFS_DINODE_FMT_EXTENTS);
+                extnum = ip->i_cnextents;
+                ip->i_cnextents = tip->i_cnextents;
+                tip->i_cnextents = extnum;
                cowfp = ip->i_cowfp;
                ip->i_cowfp = tip->i_cowfp;
                tip->i_cowfp = cowfp;
-                xfs_inode_set_cowblocks_tag(ip);
-                xfs_inode_set_cowblocks_tag(tip);
+                if (ip->i_cowfp && ip->i_cnextents)
+                        xfs_inode_set_cowblocks_tag(ip);
+                else
+                        xfs_inode_clear_cowblocks_tag(ip);
+                if (tip->i_cowfp && tip->i_cnextents)
+                        xfs_inode_set_cowblocks_tag(tip);
+                else
+                        xfs_inode_clear_cowblocks_tag(tip);
        }
        xfs_trans_log_inode(tp, ip,  src_log_flags);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index da14658da310..2f97c12ca75e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
        int             size;
        int             offset;
-        total_nr_pages = bp->b_page_count;
        /* skip the pages in the buffer before the start offset */
        page_index = 0;
        offset = *buf_offset;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index bd786a9ac2c3..eaf86f55b7f2 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,7 +347,7 @@ xfs_verifier_error(
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-        xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
+        xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
                  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
                  __return_address, bp->b_ops->name, bp->b_bn);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebdd0bd2b261..309e26c9dddb 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -58,7 +58,7 @@ xfs_zero_range(
        xfs_off_t               count,
        bool                    *did_zero)
 {
-        return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
+        return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
 }
 int
@@ -377,8 +377,6 @@ restart:
         */
        spin_lock(&ip->i_flags_lock);
        if (iocb->ki_pos > i_size_read(inode)) {
-                bool    zero = false;
                spin_unlock(&ip->i_flags_lock);
                if (!drained_dio) {
                        if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
                        drained_dio = true;
                        goto restart;
                }
-                error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
+                error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
                if (error)
                        return error;
        } else
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  offset = iocb->ki_pos;
-        bool                    update_size = false;
        int                     error = 0;
        trace_xfs_end_io_direct_write(ip, offset, size);
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
        if (size <= 0)
                return size;
+        if (flags & IOMAP_DIO_COW) {
+                error = xfs_reflink_end_cow(ip, offset, size);
+                if (error)
+                        return error;
+        }
+        /*
+         * Unwritten conversion updates the in-core isize after extent
+         * conversion but before updating the on-disk size. Updating isize any
+         * earlier allows a racing dio read to find unwritten extents before
+         * they are converted.
+         */
+        if (flags & IOMAP_DIO_UNWRITTEN)
+                return xfs_iomap_write_unwritten(ip, offset, size, true);
        /*
         * We need to update the in-core inode size here so that we don't end up
         * with the on-disk inode size being outside the in-core inode size. We
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
        spin_lock(&ip->i_flags_lock);
        if (offset + size > i_size_read(inode)) {
                i_size_write(inode, offset + size);
-                update_size = true;
+                spin_unlock(&ip->i_flags_lock);
-        }
-        spin_unlock(&ip->i_flags_lock);
-        if (flags & IOMAP_DIO_COW) {
-                error = xfs_reflink_end_cow(ip, offset, size);
-                if (error)
-                        return error;
-        }
-        if (flags & IOMAP_DIO_UNWRITTEN)
-                error = xfs_iomap_write_unwritten(ip, offset, size);
-        else if (update_size)
                error = xfs_setfilesize(ip, offset, size);
+        } else {
+                spin_unlock(&ip->i_flags_lock);
+        }
        return error;
 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5599dda4727a..4ec5b7f45401 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
                goto out;
        /*
-         * Clear the reflink flag if we truncated everything.
+         * Clear the reflink flag if there are no data fork blocks and
+         * there are no extents staged in the cow fork.
         */
-        if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
+        if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-                ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+                if (ip->i_d.di_nblocks == 0)
+                        ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
                xfs_inode_clear_cowblocks_tag(ip);
        }
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6d0f74ec31e8..a705f34b58fa 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -745,7 +745,7 @@ xfs_iflush_done(
                 */
                iip = INODE_ITEM(blip);
                if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
-                    lip->li_flags & XFS_LI_FAILED)
+                    (blip->li_flags & XFS_LI_FAILED))
                        need_ail++;
                blip = next;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5049e8ab6e30..aa75389be8cf 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
        int                     *join_flags)
 {
        struct inode            *inode = VFS_I(ip);
+        struct super_block      *sb = inode->i_sb;
        int                     error;
        *join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
        if (fa->fsx_xflags & FS_XFLAG_DAX) {
                if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
                        return -EINVAL;
-                if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+                if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
                        return -EINVAL;
        }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a1909bc064e9..f179bdf1644d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -829,7 +829,8 @@ int
 xfs_iomap_write_unwritten(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
-        xfs_off_t       count)
+        xfs_off_t       count,
+        bool            update_isize)
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_fileoff_t   offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        struct xfs_defer_ops dfops;
+        struct inode    *inode = VFS_I(ip);
        xfs_fsize_t     i_size;
        uint            resblks;
        int             error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
                i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
                if (i_size > offset + count)
                        i_size = offset + count;
+                if (update_isize && i_size > i_size_read(inode))
+                        i_size_write(inode, i_size);
                i_size = xfs_new_eof(ip, i_size);
                if (i_size) {
                        ip->i_d.di_size = i_size;
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 00db3ecea084..ee535065c5d0 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
                        struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
                        struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                struct xfs_bmbt_irec *);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 2f2dc3c09ad0..4246876df7b7 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
                                        (end - 1) >> PAGE_SHIFT);
                WARN_ON_ONCE(error);
-                error = xfs_iomap_write_unwritten(ip, start, length);
+                error = xfs_iomap_write_unwritten(ip, start, length, false);
                if (error)
                        goto out_drop_iolock;
        }
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3246815c24d6..37e603bf1591 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -736,7 +736,13 @@ xfs_reflink_end_cow(
        /* If there is a hole at end_fsb - 1 go to the previous extent */
        if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) ||
            got.br_startoff > end_fsb) {
-                ASSERT(idx > 0);
+                /*
+                 * In case of racing, overlapping AIO writes no COW extents
+                 * might be left by the time I/O completes for the loser of
+                 * the race.  In that case we are done.
+                 */
+                if (idx <= 0)
+                        goto out_cancel;
                xfs_iext_get_extent(ifp, --idx, &got);
        }
@@ -809,6 +815,7 @@ next_extent:
 out_defer:
        xfs_defer_cancel(&dfops);
+out_cancel:
        xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out:
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c996f4ae4a5f..584cf2d573ba 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
                "DAX and reflink have not been tested together!");
        }
+        if (mp->m_flags & XFS_MOUNT_DISCARD) {
+                struct request_queue *q = bdev_get_queue(sb->s_bdev);
+                if (!blk_queue_discard(q)) {
+                        xfs_warn(mp, "mounting with \"discard\" option, but "
+                                        "the device does not support discard");
+                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
+                }
+        }
        if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
                if (mp->m_sb.sb_rblocks) {
                        xfs_alert(mp,
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2017-10-09 03:02:35 -0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2017-10-09 03:02:35 -0400
commit	1236d6bb6e19fc72ffc6bbcdeb1bfefe450e54ee (patch)
tree	47da3feee8e263e8c9352c85cf518e624be3c211 /fs
parent	750b1a6894ecc9b178c6e3d0a1170122971b2036 (diff)
parent	8a5776a5f49812d29fe4b2d0a2d71675c3facf3f (diff)