47 files changed, 571 insertions, 279 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 22ea424ee741..073bb57adab1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1242,6 +1242,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                goto out_clear;
                        }
                        bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+                        /*
+                         * If the partition is not aligned on a page
+                         * boundary, we can't do dax I/O to it.
+                         */
+                        if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
+                            (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+                                bdev->bd_inode->i_flags &= ~S_DAX;
                }
        } else {
                if (bdev->bd_contains == bdev) {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ecbc63d3143e..9a2ec79e8cfb 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1828,7 +1828,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
        int found = 0;
        struct extent_buffer *eb;
        struct btrfs_inode_extref *extref;
-        struct extent_buffer *leaf;
        u32 item_size;
        u32 cur_offset;
        unsigned long ptr;
@@ -1856,9 +1855,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
                btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                btrfs_release_path(path);
-                leaf = path->nodes[0];
+                item_size = btrfs_item_size_nr(eb, slot);
-                item_size = btrfs_item_size_nr(leaf, slot);
+                ptr = btrfs_item_ptr_offset(eb, slot);
-                ptr = btrfs_item_ptr_offset(leaf, slot);
                cur_offset = 0;
                while (cur_offset < item_size) {
@@ -1872,7 +1870,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
                        if (ret)
                                break;
-                        cur_offset += btrfs_inode_extref_name_len(leaf, extref);
+                        cur_offset += btrfs_inode_extref_name_len(eb, extref);
                        cur_offset += sizeof(*extref);
                }
                btrfs_tree_read_unlock_blocking(eb);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 81220b2203c6..0ef5cc13fae2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,8 +44,6 @@
 #define BTRFS_INODE_IN_DELALLOC_LIST            9
 #define BTRFS_INODE_READDIO_NEED_LOCK           10
 #define BTRFS_INODE_HAS_PROPS                   11
-/* DIO is ready to submit */
-#define BTRFS_INODE_DIO_READY                   12
 /*
 * The following 3 bits are meant only for the btree inode.
 * When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0d98aee34fee..1e60d00d4ea7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2847,6 +2847,8 @@ int open_ctree(struct super_block *sb,
            !extent_buffer_uptodate(chunk_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
+                if (!IS_ERR(chunk_root->node))
+                        free_extent_buffer(chunk_root->node);
                chunk_root->node = NULL;
                goto fail_tree_roots;
        }
@@ -2885,6 +2887,8 @@ retry_root_backup:
            !extent_buffer_uptodate(tree_root->node)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
+                if (!IS_ERR(tree_root->node))
+                        free_extent_buffer(tree_root->node);
                tree_root->node = NULL;
                goto recovery_tree_root;
        }
@@ -3765,9 +3769,7 @@ void close_ctree(struct btrfs_root *root)
                 * block groups queued for removal, the deletion will be
                 * skipped when we quit the cleaner thread.
                 */
-                mutex_lock(&root->fs_info->cleaner_mutex);
                btrfs_delete_unused_bgs(root->fs_info);
-                mutex_unlock(&root->fs_info->cleaner_mutex);
                ret = btrfs_commit_super(root);
                if (ret)
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 8d052209f473..2513a7f53334 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -112,11 +112,11 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
        u32 generation;
        if (fh_type == FILEID_BTRFS_WITH_PARENT) {
-                if (fh_len !=  BTRFS_FID_SIZE_CONNECTABLE)
+                if (fh_len <  BTRFS_FID_SIZE_CONNECTABLE)
                        return NULL;
                root_objectid = fid->root_objectid;
        } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
-                if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
+                if (fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT)
                        return NULL;
                root_objectid = fid->parent_root_objectid;
        } else
@@ -136,11 +136,11 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
        u32 generation;
        if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
-             fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
+             fh_len < BTRFS_FID_SIZE_CONNECTABLE) &&
            (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
-             fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
+             fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
            (fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
-             fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
+             fh_len < BTRFS_FID_SIZE_NON_CONNECTABLE))
                return NULL;
        objectid = fid->objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5411f0ab5683..601d7d45d164 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2828,6 +2828,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_head *head;
        int ret;
        int run_all = count == (unsigned long)-1;
+        bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
        /* We'll clean this up in btrfs_cleanup_transaction */
        if (trans->aborted)
@@ -2844,6 +2845,7 @@ again:
 #ifdef SCRAMBLE_DELAYED_REFS
        delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
 #endif
+        trans->can_flush_pending_bgs = false;
        ret = __btrfs_run_delayed_refs(trans, root, count);
        if (ret < 0) {
                btrfs_abort_transaction(trans, root, ret);
@@ -2893,6 +2895,7 @@ again:
        }
 out:
        assert_qgroups_uptodate(trans);
+        trans->can_flush_pending_bgs = can_flush_pending_bgs;
        return 0;
 }
@@ -3742,10 +3745,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->bytes_reserved = 0;
        found->bytes_readonly = 0;
        found->bytes_may_use = 0;
-        if (total_bytes > 0)
+        found->full = 0;
-                found->full = 0;
-        else
-                found->full = 1;
        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
        found->chunk_alloc = 0;
        found->flush = 0;
@@ -4309,7 +4309,8 @@ out:
         * the block groups that were made dirty during the lifetime of the
         * transaction.
         */
-        if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
+        if (trans->can_flush_pending_bgs &&
+            trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
                btrfs_create_pending_block_groups(trans, trans->root);
                btrfs_trans_release_chunk_metadata(trans);
        }
@@ -8668,7 +8669,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        }
        if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
-                btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
+                btrfs_add_dropped_root(trans, root);
        } else {
                free_extent_buffer(root->node);
                free_extent_buffer(root->commit_root);
@@ -9563,7 +9564,9 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_item item;
        struct btrfs_key key;
        int ret = 0;
+        bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
+        trans->can_flush_pending_bgs = false;
        list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
                if (ret)
                        goto next;
@@ -9584,6 +9587,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
 next:
                list_del_init(&block_group->bg_list);
        }
+        trans->can_flush_pending_bgs = can_flush_pending_bgs;
 }
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f1018cfbfefa..3915c9473e94 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                              bio_end_io_t end_io_func,
                              int mirror_num,
                              unsigned long prev_bio_flags,
-                              unsigned long bio_flags)
+                              unsigned long bio_flags,
+                              bool force_bio_submit)
 {
        int ret = 0;
        struct bio *bio;
@@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                        contig = bio_end_sector(bio) == sector;
                if (prev_bio_flags != bio_flags || !contig ||
+                    force_bio_submit ||
                    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
                    bio_add_page(bio, page, page_size, offset) < page_size) {
                        ret = submit_one_bio(rw, bio, mirror_num,
@@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree,
                         get_extent_t *get_extent,
                         struct extent_map **em_cached,
                         struct bio **bio, int mirror_num,
-                         unsigned long *bio_flags, int rw)
+                         unsigned long *bio_flags, int rw,
+                         u64 *prev_em_start)
 {
        struct inode *inode = page->mapping->host;
        u64 start = page_offset(page);
@@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree,
        }
        while (cur <= end) {
                unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+                bool force_bio_submit = false;
                if (cur >= last_byte) {
                        char *userpage;
@@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree,
                block_start = em->block_start;
                if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
                        block_start = EXTENT_MAP_HOLE;
+                /*
+                 * If we have a file range that points to a compressed extent
+                 * and it's followed by a consecutive file range that points to
+                 * to the same compressed extent (possibly with a different
+                 * offset and/or length, so it either points to the whole extent
+                 * or only part of it), we must make sure we do not submit a
+                 * single bio to populate the pages for the 2 ranges because
+                 * this makes the compressed extent read zero out the pages
+                 * belonging to the 2nd range. Imagine the following scenario:
+                 *
+                 *  File layout
+                 *  [0 - 8K]                     [8K - 24K]
+                 *    |                               |
+                 *    |                               |
+                 * points to extent X,         points to extent X,
+                 * offset 4K, length of 8K     offset 0, length 16K
+                 *
+                 * [extent X, compressed length = 4K uncompressed length = 16K]
+                 *
+                 * If the bio to read the compressed extent covers both ranges,
+                 * it will decompress extent X into the pages belonging to the
+                 * first range and then it will stop, zeroing out the remaining
+                 * pages that belong to the other range that points to extent X.
+                 * So here we make sure we submit 2 bios, one for the first
+                 * range and another one for the third range. Both will target
+                 * the same physical extent from disk, but we can't currently
+                 * make the compressed bio endio callback populate the pages
+                 * for both ranges because each compressed bio is tightly
+                 * coupled with a single extent map, and each range can have
+                 * an extent map with a different offset value relative to the
+                 * uncompressed data of our extent and different lengths. This
+                 * is a corner case so we prioritize correctness over
+                 * non-optimal behavior (submitting 2 bios for the same extent).
+                 */
+                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
+                    prev_em_start && *prev_em_start != (u64)-1 &&
+                    *prev_em_start != em->orig_start)
+                        force_bio_submit = true;
+                if (prev_em_start)
+                        *prev_em_start = em->orig_start;
                free_extent_map(em);
                em = NULL;
@@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree,
                                         bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num,
                                         *bio_flags,
-                                         this_bio_flag);
+                                         this_bio_flag,
+                                         force_bio_submit);
                if (!ret) {
                        nr++;
                        *bio_flags = this_bio_flag;
@@ -3084,7 +3132,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
                                             get_extent_t *get_extent,
                                             struct extent_map **em_cached,
                                             struct bio **bio, int mirror_num,
-                                             unsigned long *bio_flags, int rw)
+                                             unsigned long *bio_flags, int rw,
+                                             u64 *prev_em_start)
 {
        struct inode *inode;
        struct btrfs_ordered_extent *ordered;
@@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
        for (index = 0; index < nr_pages; index++) {
                __do_readpage(tree, pages[index], get_extent, em_cached, bio,
-                              mirror_num, bio_flags, rw);
+                              mirror_num, bio_flags, rw, prev_em_start);
                page_cache_release(pages[index]);
        }
 }
@@ -3114,7 +3163,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
                               int nr_pages, get_extent_t *get_extent,
                               struct extent_map **em_cached,
                               struct bio **bio, int mirror_num,
-                               unsigned long *bio_flags, int rw)
+                               unsigned long *bio_flags, int rw,
+                               u64 *prev_em_start)
 {
        u64 start = 0;
        u64 end = 0;
@@ -3135,7 +3185,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
                                                  index - first_index, start,
                                                  end, get_extent, em_cached,
                                                  bio, mirror_num, bio_flags,
-                                                  rw);
+                                                  rw, prev_em_start);
                        start = page_start;
                        end = start + PAGE_CACHE_SIZE - 1;
                        first_index = index;
@@ -3146,7 +3196,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
                __do_contiguous_readpages(tree, &pages[first_index],
                                          index - first_index, start,
                                          end, get_extent, em_cached, bio,
-                                          mirror_num, bio_flags, rw);
+                                          mirror_num, bio_flags, rw,
+                                          prev_em_start);
 }
 static int __extent_read_full_page(struct extent_io_tree *tree,
@@ -3172,7 +3223,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        }
        ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
-                            bio_flags, rw);
+                            bio_flags, rw, NULL);
        return ret;
 }
@@ -3198,7 +3249,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
        int ret;
        ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
-                                      &bio_flags, READ);
+                            &bio_flags, READ, NULL);
        if (bio)
                ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
        return ret;
@@ -3451,7 +3502,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
                                                 sector, iosize, pg_offset,
                                                 bdev, &epd->bio, max_nr,
                                                 end_bio_extent_writepage,
-                                                 0, 0, 0);
+                                                 0, 0, 0, false);
                        if (ret)
                                SetPageError(page);
                }
@@ -3754,7 +3805,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
                ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
                                         PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
                                         -1, end_bio_extent_buffer_writepage,
-                                         0, epd->bio_flags, bio_flags);
+                                         0, epd->bio_flags, bio_flags, false);
                epd->bio_flags = bio_flags;
                if (ret) {
                        set_btree_ioerr(p);
@@ -4158,6 +4209,7 @@ int extent_readpages(struct extent_io_tree *tree,
        struct page *page;
        struct extent_map *em_cached = NULL;
        int nr = 0;
+        u64 prev_em_start = (u64)-1;
        for (page_idx = 0; page_idx < nr_pages; page_idx++) {
                page = list_entry(pages->prev, struct page, lru);
@@ -4174,12 +4226,12 @@ int extent_readpages(struct extent_io_tree *tree,
                if (nr < ARRAY_SIZE(pagepool))
                        continue;
                __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-                                   &bio, 0, &bio_flags, READ);
+                                   &bio, 0, &bio_flags, READ, &prev_em_start);
                nr = 0;
        }
        if (nr)
                __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
-                                   &bio, 0, &bio_flags, READ);
+                                   &bio, 0, &bio_flags, READ, &prev_em_start);
        if (em_cached)
                free_extent_map(em_cached);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0fa7253a2d7..611b66d73e80 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
        /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
-        btrfs_wait_ordered_range(inode, 0, (u64)-1);
+        if (!special_file(inode->i_mode))
+                btrfs_wait_ordered_range(inode, 0, (u64)-1);
        btrfs_free_io_failure_record(inode, 0, (u64)-1);
@@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
        return em;
 }
+struct btrfs_dio_data {
+        u64 outstanding_extents;
+        u64 reserve;
+};
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
@@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_state *cached_state = NULL;
+        struct btrfs_dio_data *dio_data = NULL;
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
-        u64 *outstanding_extents = NULL;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
@@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                 * that anything that needs to check if there's a transction doesn't get
                 * confused.
                 */
-                outstanding_extents = current->journal_info;
+                dio_data = current->journal_info;
                current->journal_info = NULL;
        }
@@ -7568,17 +7573,18 @@ unlock:
                 * within our reservation, otherwise we need to adjust our inode
                 * counter appropriately.
                 */
-                if (*outstanding_extents) {
+                if (dio_data->outstanding_extents) {
-                        (*outstanding_extents)--;
+                        (dio_data->outstanding_extents)--;
                } else {
                        spin_lock(&BTRFS_I(inode)->lock);
                        BTRFS_I(inode)->outstanding_extents++;
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
-                current->journal_info = outstanding_extents;
                btrfs_free_reserved_data_space(inode, len);
-                set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
+                WARN_ON(dio_data->reserve < len);
+                dio_data->reserve -= len;
+                current->journal_info = dio_data;
        }
        /*
@@ -7601,8 +7607,8 @@ unlock:
 unlock_err:
        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                         unlock_bits, 1, 0, &cached_state, GFP_NOFS);
-        if (outstanding_extents)
+        if (dio_data)
-                current->journal_info = outstanding_extents;
+                current->journal_info = dio_data;
        return ret;
 }
@@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
-        u64 outstanding_extents = 0;
+        struct btrfs_root *root = BTRFS_I(inode)->root;
+        struct btrfs_dio_data dio_data = { 0 };
        size_t count = 0;
        int flags = 0;
        bool wakeup = true;
@@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                ret = btrfs_delalloc_reserve_space(inode, count);
                if (ret)
                        goto out;
-                outstanding_extents = div64_u64(count +
+                dio_data.outstanding_extents = div64_u64(count +
                                                BTRFS_MAX_EXTENT_SIZE - 1,
                                                BTRFS_MAX_EXTENT_SIZE);
@@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 * do the accounting properly if we go over the number we
                 * originally calculated.  Abuse current->journal_info for this.
                 */
-                current->journal_info = &outstanding_extents;
+                dio_data.reserve = round_up(count, root->sectorsize);
+                current->journal_info = &dio_data;
        } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                     &BTRFS_I(inode)->runtime_flags)) {
                inode_dio_end(inode);
@@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        if (iov_iter_rw(iter) == WRITE) {
                current->journal_info = NULL;
                if (ret < 0 && ret != -EIOCBQUEUED) {
-                        /*
+                        if (dio_data.reserve)
-                         * If the error comes from submitting stage,
+                                btrfs_delalloc_release_space(inode,
-                         * btrfs_get_blocsk_direct() has free'd data space,
+                                                        dio_data.reserve);
-                         * and metadata space will be handled by
-                         * finish_ordered_fn, don't do that again to make
-                         * sure bytes_may_use is correct.
-                         */
-                        if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
-                                     &BTRFS_I(inode)->runtime_flags))
-                                btrfs_delalloc_release_space(inode, count);
                } else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode,
                                                     count - (size_t)ret);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0adf5422fce9..3e3e6130637f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4639,6 +4639,11 @@ locked:
                bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
        }
+        if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
+                ret = -EINVAL;
+                goto out_bargs;
+        }
 do_balance:
        /*
         * Ownership of bctl and mutually_exclusive_operation_running
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index aa72bfd28f7d..a739b825bdd3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1920,10 +1920,12 @@ static int did_overwrite_ref(struct send_ctx *sctx,
        /*
         * We know that it is or will be overwritten. Check this now.
         * The current inode being processed might have been the one that caused
-         * inode 'ino' to be orphanized, therefore ow_inode can actually be the
+         * inode 'ino' to be orphanized, therefore check if ow_inode matches
-         * same as sctx->send_progress.
+         * the current inode being processed.
         */
-        if (ow_inode <= sctx->send_progress)
+        if ((ow_inode < sctx->send_progress) ||
+            (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
+             gen == sctx->cur_inode_gen))
                ret = 1;
        else
                ret = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2b07b3581781..11d1eab9234d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                 * groups on disk until we're mounted read-write again
                 * unless we clean them up here.
                 */
-                mutex_lock(&root->fs_info->cleaner_mutex);
                btrfs_delete_unused_bgs(fs_info);
-                mutex_unlock(&root->fs_info->cleaner_mutex);
                btrfs_dev_replace_suspend_for_unmount(fs_info);
                btrfs_scrub_cancel(fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8f259b3a66b3..a5b06442f0bf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
                        btrfs_unpin_free_ino(root);
                clear_btree_io_tree(&root->dirty_log_pages);
        }
+        /* We can free old roots now. */
+        spin_lock(&trans->dropped_roots_lock);
+        while (!list_empty(&trans->dropped_roots)) {
+                root = list_first_entry(&trans->dropped_roots,
+                                        struct btrfs_root, root_list);
+                list_del_init(&root->root_list);
+                spin_unlock(&trans->dropped_roots_lock);
+                btrfs_drop_and_free_fs_root(fs_info, root);
+                spin_lock(&trans->dropped_roots_lock);
+        }
+        spin_unlock(&trans->dropped_roots_lock);
        up_write(&fs_info->commit_root_sem);
 }
@@ -255,11 +267,13 @@ loop:
        INIT_LIST_HEAD(&cur_trans->pending_ordered);
        INIT_LIST_HEAD(&cur_trans->dirty_bgs);
        INIT_LIST_HEAD(&cur_trans->io_bgs);
+        INIT_LIST_HEAD(&cur_trans->dropped_roots);
        mutex_init(&cur_trans->cache_write_mutex);
        cur_trans->num_dirty_bgs = 0;
        spin_lock_init(&cur_trans->dirty_bgs_lock);
        INIT_LIST_HEAD(&cur_trans->deleted_bgs);
        spin_lock_init(&cur_trans->deleted_bgs_lock);
+        spin_lock_init(&cur_trans->dropped_roots_lock);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
@@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
 }
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root)
+{
+        struct btrfs_transaction *cur_trans = trans->transaction;
+        /* Add ourselves to the transaction dropped list */
+        spin_lock(&cur_trans->dropped_roots_lock);
+        list_add_tail(&root->root_list, &cur_trans->dropped_roots);
+        spin_unlock(&cur_trans->dropped_roots_lock);
+        /* Make sure we don't try to update the root at commit time */
+        spin_lock(&root->fs_info->fs_roots_radix_lock);
+        radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
+                             (unsigned long)root->root_key.objectid,
+                             BTRFS_ROOT_TRANS_TAG);
+        spin_unlock(&root->fs_info->fs_roots_radix_lock);
+}
 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root)
 {
@@ -525,6 +557,7 @@ again:
        h->delayed_ref_elem.seq = 0;
        h->type = type;
        h->allocating_chunk = false;
+        h->can_flush_pending_bgs = true;
        h->reloc_reserved = false;
        h->sync = false;
        INIT_LIST_HEAD(&h->qgroup_ref_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index edc2fbc262d7..a994bb097ee5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -65,6 +65,7 @@ struct btrfs_transaction {
        struct list_head switch_commits;
        struct list_head dirty_bgs;
        struct list_head io_bgs;
+        struct list_head dropped_roots;
        u64 num_dirty_bgs;
        /*
@@ -76,6 +77,7 @@ struct btrfs_transaction {
        spinlock_t dirty_bgs_lock;
        struct list_head deleted_bgs;
        spinlock_t deleted_bgs_lock;
+        spinlock_t dropped_roots_lock;
        struct btrfs_delayed_ref_root delayed_refs;
        int aborted;
        int dirty_bg_run;
@@ -116,6 +118,7 @@ struct btrfs_trans_handle {
        short aborted;
        short adding_csums;
        bool allocating_chunk;
+        bool can_flush_pending_bgs;
        bool reloc_reserved;
        bool sync;
        unsigned int type;
@@ -216,5 +219,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info);
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
 void btrfs_put_transaction(struct btrfs_transaction *transaction);
 void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root);
 #endif
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2ca784a14e84..595279a8b99f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -376,6 +376,14 @@ struct map_lookup {
 #define BTRFS_BALANCE_ARGS_VRANGE       (1ULL << 4)
 #define BTRFS_BALANCE_ARGS_LIMIT        (1ULL << 5)
+#define BTRFS_BALANCE_ARGS_MASK                 \
+        (BTRFS_BALANCE_ARGS_PROFILES |          \
+         BTRFS_BALANCE_ARGS_USAGE |             \
+         BTRFS_BALANCE_ARGS_DEVID |             \
+         BTRFS_BALANCE_ARGS_DRANGE |            \
+         BTRFS_BALANCE_ARGS_VRANGE |            \
+         BTRFS_BALANCE_ARGS_LIMIT)
 /*
 * Profile changing flags.  When SOFT is set we won't relocate chunk if
 * it already has the target profile (even though it may be
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index aa0dc2573374..afa09fce8151 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -444,6 +444,48 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
        return 0;
 }
+/* Server has provided av pairs/target info in the type 2 challenge
+ * packet and we have plucked it and stored within smb session.
+ * We parse that blob here to find the server given timestamp
+ * as part of ntlmv2 authentication (or local current time as
+ * default in case of failure)
+ */
+static __le64
+find_timestamp(struct cifs_ses *ses)
+{
+        unsigned int attrsize;
+        unsigned int type;
+        unsigned int onesize = sizeof(struct ntlmssp2_name);
+        unsigned char *blobptr;
+        unsigned char *blobend;
+        struct ntlmssp2_name *attrptr;
+        if (!ses->auth_key.len || !ses->auth_key.response)
+                return 0;
+        blobptr = ses->auth_key.response;
+        blobend = blobptr + ses->auth_key.len;
+        while (blobptr + onesize < blobend) {
+                attrptr = (struct ntlmssp2_name *) blobptr;
+                type = le16_to_cpu(attrptr->type);
+                if (type == NTLMSSP_AV_EOL)
+                        break;
+                blobptr += 2; /* advance attr type */
+                attrsize = le16_to_cpu(attrptr->length);
+                blobptr += 2; /* advance attr size */
+                if (blobptr + attrsize > blobend)
+                        break;
+                if (type == NTLMSSP_AV_TIMESTAMP) {
+                        if (attrsize == sizeof(u64))
+                                return *((__le64 *)blobptr);
+                }
+                blobptr += attrsize; /* advance attr value */
+        }
+        return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+}
 static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
                            const struct nls_table *nls_cp)
 {
@@ -641,6 +683,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
        struct ntlmv2_resp *ntlmv2;
        char ntlmv2_hash[16];
        unsigned char *tiblob = NULL; /* target info blob */
+        __le64 rsp_timestamp;
        if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) {
                if (!ses->domainName) {
@@ -659,6 +702,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
                }
        }
+        /* Must be within 5 minutes of the server (or in range +/-2h
+         * in case of Mac OS X), so simply carry over server timestamp
+         * (as Windows 7 does)
+         */
+        rsp_timestamp = find_timestamp(ses);
        baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
        tilen = ses->auth_key.len;
        tiblob = ses->auth_key.response;
@@ -675,8 +724,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
                        (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
        ntlmv2->blob_signature = cpu_to_le32(0x00000101);
        ntlmv2->reserved = 0;
-        /* Must be within 5 minutes of the server */
+        ntlmv2->time = rsp_timestamp;
-        ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
        get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal));
        ntlmv2->reserved2 = 0;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6a1119e87fbb..e739950ca084 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
 static void
 cifs_show_security(struct seq_file *s, struct cifs_ses *ses)
 {
-        if (ses->sectype == Unspecified)
+        if (ses->sectype == Unspecified) {
+                if (ses->user_name == NULL)
+                        seq_puts(s, ",sec=none");
                return;
+        }
        seq_puts(s, ",sec=");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 27aea110e923..c3cc1609025f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION   "2.07"
+#define CIFS_VERSION   "2.08"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e2a6af1508af..62203c387db4 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3380,6 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
        struct page *page, *tpage;
        unsigned int expected_index;
        int rc;
+        gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
        INIT_LIST_HEAD(tmplist);
@@ -3392,7 +3393,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
         */
        __set_page_locked(page);
        rc = add_to_page_cache_locked(page, mapping,
-                                      page->index, GFP_KERNEL);
+                                      page->index, gfp);
        /* give up if we can't stick it in the cache */
        if (rc) {
@@ -3418,8 +3419,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
                        break;
                __set_page_locked(page);
-                if (add_to_page_cache_locked(page, mapping, page->index,
+                if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
-                                                                GFP_KERNEL)) {
                        __clear_page_locked(page);
                        break;
                }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f621b44cb800..6b66dd5d1540 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2034,7 +2034,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
        struct tcon_link *tlink = NULL;
        struct cifs_tcon *tcon = NULL;
        struct TCP_Server_Info *server;
-        struct cifs_io_parms io_parms;
        /*
         * To avoid spurious oplock breaks from server, in the case of
@@ -2056,18 +2055,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
                        rc = -ENOSYS;
                cifsFileInfo_put(open_file);
                cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc);
-                if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-                        unsigned int bytes_written;
-                        io_parms.netfid = open_file->fid.netfid;
-                        io_parms.pid = open_file->pid;
-                        io_parms.tcon = tcon;
-                        io_parms.offset = 0;
-                        io_parms.length = attrs->ia_size;
-                        rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
-                                          NULL, NULL, 1);
-                        cifs_dbg(FYI, "Wrt seteof rc %d\n", rc);
-                }
        } else
                rc = -EINVAL;
@@ -2093,28 +2080,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
        else
                rc = -ENOSYS;
        cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc);
-        if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-                __u16 netfid;
-                int oplock = 0;
-                rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN,
-                                   GENERIC_WRITE, CREATE_NOT_DIR, &netfid,
-                                   &oplock, NULL, cifs_sb->local_nls,
-                                   cifs_remap(cifs_sb));
-                if (rc == 0) {
-                        unsigned int bytes_written;
-                        io_parms.netfid = netfid;
-                        io_parms.pid = current->tgid;
-                        io_parms.tcon = tcon;
-                        io_parms.offset = 0;
-                        io_parms.length = attrs->ia_size;
-                        rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL,
-                                          NULL,  1);
-                        cifs_dbg(FYI, "wrt seteof rc %d\n", rc);
-                        CIFSSMBClose(xid, tcon, netfid);
-                }
-        }
        if (tlink)
                cifs_put_tlink(tlink);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index c63f5227b681..28a77bf1d559 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
                goto out_drop_write;
        }
+        if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
+                rc = -EBADF;
+                cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
+                goto out_fput;
+        }
        if ((!src_file.file->private_data) || (!dst_file->private_data)) {
                rc = -EBADF;
                cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index df91bcf56d67..18da19f4f811 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -50,9 +50,13 @@ change_conf(struct TCP_Server_Info *server)
                break;
        default:
                server->echoes = true;
-                server->oplocks = true;
+                if (enable_oplocks) {
+                        server->oplocks = true;
+                        server->oplock_credits = 1;
+                } else
+                        server->oplocks = false;
                server->echo_credits = 1;
-                server->oplock_credits = 1;
        }
        server->credits -= server->echo_credits + server->oplock_credits;
        return 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 070fb2ad85ce..597a417ba94d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -46,6 +46,7 @@
 #include "smb2status.h"
 #include "smb2glob.h"
 #include "cifspdu.h"
+#include "cifs_spnego.h"
 /*
 *  The following table defines the expected "StructureSize" of SMB2 requests
@@ -486,19 +487,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
                cifs_dbg(FYI, "missing security blob on negprot\n");
        rc = cifs_enable_signing(server, ses->sign);
-#ifdef CONFIG_SMB2_ASN1  /* BB REMOVEME when updated asn1.c ready */
        if (rc)
                goto neg_exit;
-        if (blob_length)
+        if (blob_length) {
                rc = decode_negTokenInit(security_blob, blob_length, server);
-        if (rc == 1)
+                if (rc == 1)
-                rc = 0;
+                        rc = 0;
-        else if (rc == 0) {
+                else if (rc == 0)
-                rc = -EIO;
+                        rc = -EIO;
-                goto neg_exit;
        }
-#endif
 neg_exit:
        free_rsp_buf(resp_buftype, rsp);
        return rc;
@@ -592,7 +589,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
        __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
        struct TCP_Server_Info *server = ses->server;
        u16 blob_length = 0;
-        char *security_blob;
+        struct key *spnego_key = NULL;
+        char *security_blob = NULL;
        char *ntlmssp_blob = NULL;
        bool use_spnego = false; /* else use raw ntlmssp */
@@ -620,7 +618,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
        ses->ntlmssp->sesskey_per_smbsess = true;
        /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
-        ses->sectype = RawNTLMSSP;
+        if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
+                ses->sectype = RawNTLMSSP;
 ssetup_ntlmssp_authenticate:
        if (phase == NtLmChallenge)
@@ -649,7 +648,48 @@ ssetup_ntlmssp_authenticate:
        iov[0].iov_base = (char *)req;
        /* 4 for rfc1002 length field and 1 for pad */
        iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
-        if (phase == NtLmNegotiate) {
+        if (ses->sectype == Kerberos) {
+#ifdef CONFIG_CIFS_UPCALL
+                struct cifs_spnego_msg *msg;
+                spnego_key = cifs_get_spnego_key(ses);
+                if (IS_ERR(spnego_key)) {
+                        rc = PTR_ERR(spnego_key);
+                        spnego_key = NULL;
+                        goto ssetup_exit;
+                }
+                msg = spnego_key->payload.data;
+                /*
+                 * check version field to make sure that cifs.upcall is
+                 * sending us a response in an expected form
+                 */
+                if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+                        cifs_dbg(VFS,
+                                  "bad cifs.upcall version. Expected %d got %d",
+                                  CIFS_SPNEGO_UPCALL_VERSION, msg->version);
+                        rc = -EKEYREJECTED;
+                        goto ssetup_exit;
+                }
+                ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+                                                 GFP_KERNEL);
+                if (!ses->auth_key.response) {
+                        cifs_dbg(VFS,
+                                "Kerberos can't allocate (%u bytes) memory",
+                                msg->sesskey_len);
+                        rc = -ENOMEM;
+                        goto ssetup_exit;
+                }
+                ses->auth_key.len = msg->sesskey_len;
+                blob_length = msg->secblob_len;
+                iov[1].iov_base = msg->data + msg->sesskey_len;
+                iov[1].iov_len = blob_length;
+#else
+                rc = -EOPNOTSUPP;
+                goto ssetup_exit;
+#endif /* CONFIG_CIFS_UPCALL */
+        } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */
                ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
                                       GFP_KERNEL);
                if (ntlmssp_blob == NULL) {
@@ -672,6 +712,8 @@ ssetup_ntlmssp_authenticate:
                        /* with raw NTLMSSP we don't encapsulate in SPNEGO */
                        security_blob = ntlmssp_blob;
                }
+                iov[1].iov_base = security_blob;
+                iov[1].iov_len = blob_length;
        } else if (phase == NtLmAuthenticate) {
                req->hdr.SessionId = ses->Suid;
                ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
@@ -699,6 +741,8 @@ ssetup_ntlmssp_authenticate:
                } else {
                        security_blob = ntlmssp_blob;
                }
+                iov[1].iov_base = security_blob;
+                iov[1].iov_len = blob_length;
        } else {
                cifs_dbg(VFS, "illegal ntlmssp phase\n");
                rc = -EIO;
@@ -710,8 +754,6 @@ ssetup_ntlmssp_authenticate:
                                cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
                                            1 /* pad */ - 4 /* rfc1001 len */);
        req->SecurityBufferLength = cpu_to_le16(blob_length);
-        iov[1].iov_base = security_blob;
-        iov[1].iov_len = blob_length;
        inc_rfc1001_len(req, blob_length - 1 /* pad */);
@@ -722,6 +764,7 @@ ssetup_ntlmssp_authenticate:
        kfree(security_blob);
        rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
+        ses->Suid = rsp->hdr.SessionId;
        if (resp_buftype != CIFS_NO_BUFFER &&
            rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
                if (phase != NtLmNegotiate) {
@@ -739,7 +782,6 @@ ssetup_ntlmssp_authenticate:
                /* NTLMSSP Negotiate sent now processing challenge (response) */
                phase = NtLmChallenge; /* process ntlmssp challenge */
                rc = 0; /* MORE_PROCESSING is not an error here but expected */
-                ses->Suid = rsp->hdr.SessionId;
                rc = decode_ntlmssp_challenge(rsp->Buffer,
                                le16_to_cpu(rsp->SecurityBufferLength), ses);
        }
@@ -796,6 +838,10 @@ keygen_exit:
                kfree(ses->auth_key.response);
                ses->auth_key.response = NULL;
        }
+        if (spnego_key) {
+                key_invalidate(spnego_key);
+                key_put(spnego_key);
+        }
        kfree(ses->ntlmssp);
        return rc;
@@ -876,6 +922,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
        if (tcon && tcon->bad_network_name)
                return -ENOENT;
+        if ((tcon && tcon->seal) &&
+            ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) {
+                cifs_dbg(VFS, "encryption requested but no server support");
+                return -EOPNOTSUPP;
+        }
        unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
        if (unc_path == NULL)
                return -ENOMEM;
@@ -955,6 +1007,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
            ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0))
                cifs_dbg(VFS, "DFS capability contradicts DFS flag\n");
        init_copy_chunk_defaults(tcon);
+        if (tcon->share_flags & SHI1005_FLAGS_ENCRYPT_DATA)
+                cifs_dbg(VFS, "Encrypted shares not supported");
        if (tcon->ses->server->ops->validate_negotiate)
                rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
 tcon_exit:
diff --git a/fs/dax.c b/fs/dax.c
index 93bf2f990ace..a86d3cc2b389 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -119,7 +119,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                size_t len;
                if (pos == max) {
                        unsigned blkbits = inode->i_blkbits;
-                        sector_t block = pos >> blkbits;
+                        long page = pos >> PAGE_SHIFT;
+                        sector_t block = page << (PAGE_SHIFT - blkbits);
                        unsigned first = pos - (block << blkbits);
                        long size;
@@ -284,6 +285,7 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh,
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                        struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct address_space *mapping = inode->i_mapping;
        sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
        unsigned long vaddr = (unsigned long)vmf->virtual_address;
        void __pmem *addr;
@@ -291,6 +293,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        pgoff_t size;
        int error;
+        i_mmap_lock_read(mapping);
        /*
         * Check truncate didn't happen while we were allocating a block.
         * If it did, this block may or may not be still allocated to the
@@ -320,6 +324,8 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        error = vm_insert_mixed(vma, vaddr, pfn);
 out:
+        i_mmap_unlock_read(mapping);
        return error;
 }
@@ -381,17 +387,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                         * from a read fault and we've raced with a truncate
                         */
                        error = -EIO;
-                        goto unlock;
+                        goto unlock_page;
                }
-        } else {
-                i_mmap_lock_write(mapping);
        }
        error = get_block(inode, block, &bh, 0);
        if (!error && (bh.b_size < PAGE_SIZE))
                error = -EIO;           /* fs corruption? */
        if (error)
-                goto unlock;
+                goto unlock_page;
        if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
                if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -402,9 +406,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                        if (!error && (bh.b_size < PAGE_SIZE))
                                error = -EIO;
                        if (error)
-                                goto unlock;
+                                goto unlock_page;
                } else {
-                        i_mmap_unlock_write(mapping);
                        return dax_load_hole(mapping, page, vmf);
                }
        }
@@ -416,15 +419,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                else
                        clear_user_highpage(new_page, vaddr);
                if (error)
-                        goto unlock;
+                        goto unlock_page;
                vmf->page = page;
                if (!page) {
+                        i_mmap_lock_read(mapping);
                        /* Check we didn't race with truncate */
                        size = (i_size_read(inode) + PAGE_SIZE - 1) >>
                                                                PAGE_SHIFT;
                        if (vmf->pgoff >= size) {
+                                i_mmap_unlock_read(mapping);
                                error = -EIO;
-                                goto unlock;
+                                goto out;
                        }
                }
                return VM_FAULT_LOCKED;
@@ -460,8 +465,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                        WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
        }
-        if (!page)
-                i_mmap_unlock_write(mapping);
 out:
        if (error == -ENOMEM)
                return VM_FAULT_OOM | major;
@@ -470,14 +473,11 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                return VM_FAULT_SIGBUS | major;
        return VM_FAULT_NOPAGE | major;
- unlock:
+ unlock_page:
        if (page) {
                unlock_page(page);
                page_cache_release(page);
-        } else {
-                i_mmap_unlock_write(mapping);
        }
        goto out;
 }
 EXPORT_SYMBOL(__dax_fault);
@@ -555,10 +555,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
        bh.b_size = PMD_SIZE;
-        i_mmap_lock_write(mapping);
        length = get_block(inode, block, &bh, write);
        if (length)
                return VM_FAULT_SIGBUS;
+        i_mmap_lock_read(mapping);
        /*
         * If the filesystem isn't willing to tell us the length of a hole,
@@ -568,24 +568,14 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
        if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
                goto fallback;
-        if (buffer_unwritten(&bh) || buffer_new(&bh)) {
-                int i;
-                for (i = 0; i < PTRS_PER_PMD; i++)
-                        clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
-                wmb_pmem();
-                count_vm_event(PGMAJFAULT);
-                mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
-                result |= VM_FAULT_MAJOR;
-        }
        /*
         * If we allocated new storage, make sure no process has any
         * zero pages covering this hole
         */
        if (buffer_new(&bh)) {
-                i_mmap_unlock_write(mapping);
+                i_mmap_unlock_read(mapping);
                unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-                i_mmap_lock_write(mapping);
+                i_mmap_lock_read(mapping);
        }
        /*
@@ -632,15 +622,25 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
                if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
                        goto fallback;
+                if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+                        int i;
+                        for (i = 0; i < PTRS_PER_PMD; i++)
+                                clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
+                        wmb_pmem();
+                        count_vm_event(PGMAJFAULT);
+                        mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+                        result |= VM_FAULT_MAJOR;
+                }
                result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
        }
 out:
+        i_mmap_unlock_read(mapping);
        if (buffer_unwritten(&bh))
                complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
-        i_mmap_unlock_write(mapping);
        return result;
 fallback:
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 47728da7702c..b46e9fc64196 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -63,7 +63,7 @@ config EXT4_FS
          If unsure, say N.
 config EXT4_USE_FOR_EXT2
-        bool "Use ext4 for ext2/ext3 file systems"
+        bool "Use ext4 for ext2 file systems"
        depends on EXT4_FS
        depends on EXT2_FS=n
        default y
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index e26803fb210d..560af0437704 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -165,8 +165,8 @@ int ext4_mpage_readpages(struct address_space *mapping,
                if (pages) {
                        page = list_entry(pages->prev, struct page, lru);
                        list_del(&page->lru);
-                        if (add_to_page_cache_lru(page, mapping,
+                        if (add_to_page_cache_lru(page, mapping, page->index,
-                                                  page->index, GFP_KERNEL))
+                                        GFP_KERNEL & mapping_gfp_mask(mapping)))
                                goto next_page;
                }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 587ac08eabb6..091a36444972 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1481,6 +1481,21 @@ static long writeback_sb_inodes(struct super_block *sb,
                wbc_detach_inode(&wbc);
                work->nr_pages -= write_chunk - wbc.nr_to_write;
                wrote += write_chunk - wbc.nr_to_write;
+                if (need_resched()) {
+                        /*
+                         * We're trying to balance between building up a nice
+                         * long list of IOs to improve our merge rate, and
+                         * getting those IOs out quickly for anyone throttling
+                         * in balance_dirty_pages().  cond_resched() doesn't
+                         * unplug, so get our IOs out the door before we
+                         * give up the CPU.
+                         */
+                        blk_flush_plug(current);
+                        cond_resched();
+                }
                spin_lock(&wb->list_lock);
                spin_lock(&inode->i_lock);
                if (!(inode->i_state & I_DIRTY_ALL))
@@ -1488,7 +1503,7 @@ static long writeback_sb_inodes(struct super_block *sb,
                requeue_inode(inode, wb, &wbc);
                inode_sync_complete(inode);
                spin_unlock(&inode->i_lock);
-                cond_resched_lock(&wb->list_lock);
                /*
                 * bail out to wb_writeback() often enough to check
                 * background threshold and other termination conditions.
diff --git a/fs/mpage.c b/fs/mpage.c
index 778a4ddef77a..a7c34274f207 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -139,7 +139,8 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 static struct bio *
 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
                sector_t *last_block_in_bio, struct buffer_head *map_bh,
-                unsigned long *first_logical_block, get_block_t get_block)
+                unsigned long *first_logical_block, get_block_t get_block,
+                gfp_t gfp)
 {
        struct inode *inode = page->mapping->host;
        const unsigned blkbits = inode->i_blkbits;
@@ -277,8 +278,7 @@ alloc_new:
                                goto out;
                }
                bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-                                min_t(int, nr_pages, BIO_MAX_PAGES),
+                                min_t(int, nr_pages, BIO_MAX_PAGES), gfp);
-                                GFP_KERNEL);
                if (bio == NULL)
                        goto confused;
        }
@@ -361,6 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
        sector_t last_block_in_bio = 0;
        struct buffer_head map_bh;
        unsigned long first_logical_block = 0;
+        gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
        map_bh.b_state = 0;
        map_bh.b_size = 0;
@@ -370,12 +371,13 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
                prefetchw(&page->flags);
                list_del(&page->lru);
                if (!add_to_page_cache_lru(page, mapping,
-                                        page->index, GFP_KERNEL)) {
+                                        page->index,
+                                        gfp)) {
                        bio = do_mpage_readpage(bio, page,
                                        nr_pages - page_idx,
                                        &last_block_in_bio, &map_bh,
                                        &first_logical_block,
-                                        get_block);
+                                        get_block, gfp);
                }
                page_cache_release(page);
        }
@@ -395,11 +397,12 @@ int mpage_readpage(struct page *page, get_block_t get_block)
        sector_t last_block_in_bio = 0;
        struct buffer_head map_bh;
        unsigned long first_logical_block = 0;
+        gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping);
        map_bh.b_state = 0;
        map_bh.b_size = 0;
        bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
-                        &map_bh, &first_logical_block, get_block);
+                        &map_bh, &first_logical_block, get_block, gfp);
        if (bio)
                mpage_bio_submit(READ, bio);
        return 0;
diff --git a/fs/namei.c b/fs/namei.c
index 726d211db484..33e9495a3129 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1558,8 +1558,6 @@ static int lookup_fast(struct nameidata *nd,
                negative = d_is_negative(dentry);
                if (read_seqcount_retry(&dentry->d_seq, seq))
                        return -ECHILD;
-                if (negative)
-                        return -ENOENT;
                /*
                 * This sequence count validates that the parent had no
@@ -1580,6 +1578,12 @@ static int lookup_fast(struct nameidata *nd,
                                goto unlazy;
                        }
                }
+                /*
+                 * Note: do negative dentry check after revalidation in
+                 * case that drops it.
+                 */
+                if (negative)
+                        return -ENOENT;
                path->mnt = mnt;
                path->dentry = dentry;
                if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 2714ef835bdd..be806ead7f4d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -113,7 +113,8 @@ out:
        return status;
 }
-static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
+static int nfs_delegation_claim_opens(struct inode *inode,
+                const nfs4_stateid *stateid, fmode_t type)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_open_context *ctx;
@@ -140,7 +141,7 @@ again:
                /* Block nfs4_proc_unlck */
                mutex_lock(&sp->so_delegreturn_mutex);
                seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
-                err = nfs4_open_delegation_recall(ctx, state, stateid);
+                err = nfs4_open_delegation_recall(ctx, state, stateid, type);
                if (!err)
                        err = nfs_delegation_claim_locks(ctx, state, stateid);
                if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -411,7 +412,8 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
        do {
                if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
                        break;
-                err = nfs_delegation_claim_opens(inode, &delegation->stateid);
+                err = nfs_delegation_claim_opens(inode, &delegation->stateid,
+                                delegation->type);
                if (!issync || err != -EAGAIN)
                        break;
                /*
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index a44829173e57..333063e032f0 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -54,7 +54,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
-int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type);
 int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
 bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 38678d9a5cc4..4b1d08f56aba 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -166,8 +166,11 @@ nfs_direct_select_verf(struct nfs_direct_req *dreq,
        struct nfs_writeverf *verfp = &dreq->verf;
 #ifdef CONFIG_NFS_V4_1
-        if (ds_clp) {
+        /*
-                /* pNFS is in use, use the DS verf */
+         * pNFS is in use, use the DS verf except commit_through_mds is set
+         * for layout segment where nbuckets is zero.
+         */
+        if (ds_clp && dreq->ds_cinfo.nbuckets > 0) {
                if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets)
                        verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf;
                else
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index b34f2e228601..02ec07973bc4 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -629,23 +629,18 @@ out_put:
        goto out;
 }
-static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
+static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
 {
        int i;
-        for (i = 0; i < fl->num_fh; i++) {
+        if (fl->fh_array) {
-                if (!fl->fh_array[i])
+                for (i = 0; i < fl->num_fh; i++) {
-                        break;
+                        if (!fl->fh_array[i])
-                kfree(fl->fh_array[i]);
+                                break;
+                        kfree(fl->fh_array[i]);
+                }
+                kfree(fl->fh_array);
        }
-        kfree(fl->fh_array);
-        fl->fh_array = NULL;
-}
-static void
-_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
-{
-        filelayout_free_fh_array(fl);
        kfree(fl);
 }
@@ -716,21 +711,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
                /* Do we want to use a mempool here? */
                fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
                if (!fl->fh_array[i])
-                        goto out_err_free;
+                        goto out_err;
                p = xdr_inline_decode(&stream, 4);
                if (unlikely(!p))
-                        goto out_err_free;
+                        goto out_err;
                fl->fh_array[i]->size = be32_to_cpup(p++);
                if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
                        printk(KERN_ERR "NFS: Too big fh %d received %d\n",
                               i, fl->fh_array[i]->size);
-                        goto out_err_free;
+                        goto out_err;
                }
                p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
                if (unlikely(!p))
-                        goto out_err_free;
+                        goto out_err;
                memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
                dprintk("DEBUG: %s: fh len %d\n", __func__,
                        fl->fh_array[i]->size);
@@ -739,8 +734,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
        __free_page(scratch);
        return 0;
-out_err_free:
-        filelayout_free_fh_array(fl);
 out_err:
        __free_page(scratch);
        return -EIO;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d731bbf974aa..0f020e4d8421 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -175,10 +175,12 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 {
        struct nfs_server *server = NFS_SERVER(file_inode(filep));
        struct nfs4_exception exception = { };
-        int err;
+        loff_t err;
        do {
                err = _nfs42_proc_llseek(filep, offset, whence);
+                if (err >= 0)
+                        break;
                if (err == -ENOTSUPP)
                        return -EOPNOTSUPP;
                err = nfs4_handle_exception(server, err, &exception);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 693b903b48bd..5133bb18830e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1127,6 +1127,21 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
        return ret;
 }
+static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
+                fmode_t fmode)
+{
+        switch(fmode & (FMODE_READ|FMODE_WRITE)) {
+        case FMODE_READ|FMODE_WRITE:
+                return state->n_rdwr != 0;
+        case FMODE_WRITE:
+                return state->n_wronly != 0;
+        case FMODE_READ:
+                return state->n_rdonly != 0;
+        }
+        WARN_ON_ONCE(1);
+        return false;
+}
 static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
 {
        int ret = 0;
@@ -1443,12 +1458,18 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
        if (delegation)
                delegation_flags = delegation->flags;
        rcu_read_unlock();
-        if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
+        switch (data->o_arg.claim) {
+        default:
+                break;
+        case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+        case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
                pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
                                   "returning a delegation for "
                                   "OPEN(CLAIM_DELEGATE_CUR)\n",
                                   clp->cl_hostname);
-        } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
+                return;
+        }
+        if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
                nfs_inode_set_delegation(state->inode,
                                         data->owner->so_cred,
                                         &data->o_res);
@@ -1571,17 +1592,13 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
        return opendata;
 }
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
+                fmode_t fmode)
 {
        struct nfs4_state *newstate;
        int ret;
-        if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
+        if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
-             opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
-            (opendata->o_arg.u.delegation_type & fmode) != fmode)
-                /* This mode can't have been delegated, so we must have
-                 * a valid open_stateid to cover it - not need to reclaim.
-                 */
                return 0;
        opendata->o_arg.open_flags = 0;
        opendata->o_arg.fmode = fmode;
@@ -1597,14 +1614,14 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
        newstate = nfs4_opendata_to_nfs4_state(opendata);
        if (IS_ERR(newstate))
                return PTR_ERR(newstate);
+        if (newstate != opendata->state)
+                ret = -ESTALE;
        nfs4_close_state(newstate, fmode);
-        *res = newstate;
+        return ret;
-        return 0;
 }
 static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
 {
-        struct nfs4_state *newstate;
        int ret;
        /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
@@ -1615,27 +1632,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
        clear_bit(NFS_DELEGATED_STATE, &state->flags);
        clear_bit(NFS_OPEN_STATE, &state->flags);
        smp_rmb();
-        if (state->n_rdwr != 0) {
+        ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
-                ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
+        if (ret != 0)
-                if (ret != 0)
+                return ret;
-                        return ret;
+        ret = nfs4_open_recover_helper(opendata, FMODE_WRITE);
-                if (newstate != state)
+        if (ret != 0)
-                        return -ESTALE;
+                return ret;
-        }
+        ret = nfs4_open_recover_helper(opendata, FMODE_READ);
-        if (state->n_wronly != 0) {
+        if (ret != 0)
-                ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
+                return ret;
-                if (ret != 0)
-                        return ret;
-                if (newstate != state)
-                        return -ESTALE;
-        }
-        if (state->n_rdonly != 0) {
-                ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
-                if (ret != 0)
-                        return ret;
-                if (newstate != state)
-                        return -ESTALE;
-        }
        /*
         * We may have performed cached opens for all three recoveries.
         * Check if we need to update the current stateid.
@@ -1759,18 +1764,35 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
        return err;
 }
-int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
+                struct nfs4_state *state, const nfs4_stateid *stateid,
+                fmode_t type)
 {
        struct nfs_server *server = NFS_SERVER(state->inode);
        struct nfs4_opendata *opendata;
-        int err;
+        int err = 0;
        opendata = nfs4_open_recoverdata_alloc(ctx, state,
                        NFS4_OPEN_CLAIM_DELEG_CUR_FH);
        if (IS_ERR(opendata))
                return PTR_ERR(opendata);
        nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
-        err = nfs4_open_recover(opendata, state);
+        write_seqlock(&state->seqlock);
+        nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+        write_sequnlock(&state->seqlock);
+        clear_bit(NFS_DELEGATED_STATE, &state->flags);
+        switch (type & (FMODE_READ|FMODE_WRITE)) {
+        case FMODE_READ|FMODE_WRITE:
+        case FMODE_WRITE:
+                err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
+                if (err)
+                        break;
+                err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
+                if (err)
+                        break;
+        case FMODE_READ:
+                err = nfs4_open_recover_helper(opendata, FMODE_READ);
+        }
        nfs4_opendata_put(opendata);
        return nfs4_handle_delegation_recall_error(server, state, stateid, err);
 }
@@ -1850,6 +1872,8 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
        data->rpc_done = 0;
        data->rpc_status = 0;
        data->timestamp = jiffies;
+        if (data->is_recover)
+                nfs4_set_sequence_privileged(&data->c_arg.seq_args);
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
@@ -2645,6 +2669,15 @@ out:
        return err;
 }
+static bool
+nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
+{
+        if (inode == NULL || !nfs_have_layout(inode))
+                return false;
+        return pnfs_wait_on_layoutreturn(inode, task);
+}
 struct nfs4_closedata {
        struct inode *inode;
        struct nfs4_state *state;
@@ -2763,6 +2796,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                goto out_no_action;
        }
+        if (nfs4_wait_on_layoutreturn(inode, task)) {
+                nfs_release_seqid(calldata->arg.seqid);
+                goto out_wait;
+        }
        if (calldata->arg.fmode == 0)
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
        if (calldata->roc)
@@ -5308,6 +5346,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
        d_data = (struct nfs4_delegreturndata *)data;
+        if (nfs4_wait_on_layoutreturn(d_data->inode, task))
+                return;
        if (d_data->roc)
                pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier);
@@ -7800,39 +7841,46 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
                        dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
                                __func__, delay);
                        rpc_delay(task, delay);
-                        task->tk_status = 0;
+                        /* Do not call nfs4_async_handle_error() */
-                        rpc_restart_call_prepare(task);
+                        goto out_restart;
-                        goto out; /* Do not call nfs4_async_handle_error() */
                }
                break;
        case -NFS4ERR_EXPIRED:
        case -NFS4ERR_BAD_STATEID:
                spin_lock(&inode->i_lock);
-                lo = NFS_I(inode)->layout;
+                if (nfs4_stateid_match(&lgp->args.stateid,
-                if (!lo || list_empty(&lo->plh_segs)) {
+                                        &lgp->args.ctx->state->stateid)) {
                        spin_unlock(&inode->i_lock);
                        /* If the open stateid was bad, then recover it. */
                        state = lgp->args.ctx->state;
-                } else {
+                        break;
+                }
+                lo = NFS_I(inode)->layout;
+                if (lo && nfs4_stateid_match(&lgp->args.stateid,
+                                        &lo->plh_stateid)) {
                        LIST_HEAD(head);
                        /*
                         * Mark the bad layout state as invalid, then retry
                         * with the current stateid.
                         */
+                        set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
                        pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
                        spin_unlock(&inode->i_lock);
                        pnfs_free_lseg_list(&head);
-        
+                } else
-                        task->tk_status = 0;
+                        spin_unlock(&inode->i_lock);
-                        rpc_restart_call_prepare(task);
+                goto out_restart;
-                }
        }
        if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
-                rpc_restart_call_prepare(task);
+                goto out_restart;
 out:
        dprintk("<-- %s\n", __func__);
        return;
+out_restart:
+        task->tk_status = 0;
+        rpc_restart_call_prepare(task);
+        return;
 out_overflow:
        task->tk_status = -EOVERFLOW;
        goto out;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index da73bc443238..d854693a15b0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1481,7 +1481,7 @@ restart:
                                        spin_unlock(&state->state_lock);
                                }
                                nfs4_put_open_state(state);
-                                clear_bit(NFS4CLNT_RECLAIM_NOGRACE,
+                                clear_bit(NFS_STATE_RECLAIM_NOGRACE,
                                        &state->flags);
                                spin_lock(&sp->so_lock);
                                goto restart;
@@ -1725,7 +1725,8 @@ restart:
                        if (!test_and_clear_bit(ops->owner_flag_bit,
                                                        &sp->so_flags))
                                continue;
-                        atomic_inc(&sp->so_count);
+                        if (!atomic_inc_not_zero(&sp->so_count))
+                                continue;
                        spin_unlock(&clp->cl_lock);
                        rcu_read_unlock();
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 28df12e525ba..671cf68fe56b 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -409,7 +409,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
                        __entry->flags = flags;
                        __entry->fmode = (__force unsigned int)ctx->mode;
                        __entry->dev = ctx->dentry->d_sb->s_dev;
-                        if (!IS_ERR(state))
+                        if (!IS_ERR_OR_NULL(state))
                                inode = state->inode;
                        if (inode != NULL) {
                                __entry->fileid = NFS_FILEID(inode);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7c5718ba625e..fe3ddd20ff89 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
         * for it without upsetting the slab allocator.
         */
        if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) *
-                        sizeof(struct page) > PAGE_SIZE)
+                        sizeof(struct page *) > PAGE_SIZE)
                return 0;
        return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ba1246433794..8abe27165ad0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1104,20 +1104,15 @@ bool pnfs_roc(struct inode *ino)
                        mark_lseg_invalid(lseg, &tmp_list);
                        found = true;
                }
-        /* pnfs_prepare_layoutreturn() grabs lo ref and it will be put
+        /* ROC in two conditions:
-         * in pnfs_roc_release(). We don't really send a layoutreturn but
-         * still want others to view us like we are sending one!
-         *
-         * If pnfs_prepare_layoutreturn() fails, it means someone else is doing
-         * LAYOUTRETURN, so we proceed like there are no layouts to return.
-         *
-         * ROC in three conditions:
         * 1. there are ROC lsegs
         * 2. we don't send layoutreturn
-         * 3. no others are sending layoutreturn
         */
-        if (found && !layoutreturn && pnfs_prepare_layoutreturn(lo))
+        if (found && !layoutreturn) {
+                /* lo ref dropped in pnfs_roc_release() */
+                pnfs_get_layout_hdr(lo);
                roc = true;
+        }
 out_noroc:
        spin_unlock(&ino->i_lock);
@@ -1172,6 +1167,26 @@ void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
        spin_unlock(&ino->i_lock);
 }
+bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
+{
+        struct nfs_inode *nfsi = NFS_I(ino);
+        struct pnfs_layout_hdr *lo;
+        bool sleep = false;
+        /* we might not have grabbed lo reference. so need to check under
+         * i_lock */
+        spin_lock(&ino->i_lock);
+        lo = nfsi->layout;
+        if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+                sleep = true;
+        spin_unlock(&ino->i_lock);
+        if (sleep)
+                rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
+        return sleep;
+}
 /*
 * Compare two layout segments for sorting into layout cache.
 * We want to preferentially return RW over RO layouts, so ensure those
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 78c9351ff117..d1990e90e7a0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -270,6 +270,7 @@ bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier);
+bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task);
 void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
@@ -639,6 +640,12 @@ pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
 {
 }
+static inline bool
+pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
+{
+        return false;
+}
 static inline void set_pnfs_layoutdriver(struct nfs_server *s,
                                         const struct nfs_fh *mntfh, u32 id)
 {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ae0ff7a11b40..01b8cc8e8cfc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 {
        struct nfs_pgio_mirror *mirror;
+        if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
+                pgio->pg_ops->pg_cleanup(pgio);
        pgio->pg_ops = &nfs_pgio_rw_ops;
        /* read path should never have more than one mirror */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 388f48079c43..75ab7622e0cc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -569,19 +569,17 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
        if (!nfs_pageio_add_request(pgio, req)) {
                nfs_redirty_request(req);
                ret = pgio->pg_error;
-        }
+        } else
+                nfs_add_stats(page_file_mapping(page)->host,
+                                NFSIOS_WRITEPAGES, 1);
 out:
        return ret;
 }
 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
 {
-        struct inode *inode = page_file_mapping(page)->host;
        int ret;
-        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-        nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
        nfs_pageio_cond_complete(pgio, page_file_index(page));
        ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
        if (ret == -EAGAIN) {
@@ -597,9 +595,11 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
        struct nfs_pageio_descriptor pgio;
+        struct inode *inode = page_file_mapping(page)->host;
        int err;
-        nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+        nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
                                false, &nfs_async_write_completion_ops);
        err = nfs_do_writepage(page, wbc, &pgio);
        nfs_pageio_complete(&pgio);
@@ -1223,7 +1223,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
                return 1;
        if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
                       list_empty_careful(&flctx->flc_posix)))
-                return 0;
+                return 1;
        /* Check to see if there are whole file write locks */
        ret = 0;
@@ -1351,6 +1351,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 {
        struct nfs_pgio_mirror *mirror;
+        if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
+                pgio->pg_ops->pg_cleanup(pgio);
        pgio->pg_ops = &nfs_pgio_rw_ops;
        nfs_pageio_stop_mirroring(pgio);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdefaa331a07..c29d9421bd5e 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -56,14 +56,6 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
        u32 device_generation = 0;
        int error;
-        /*
-         * We do not attempt to support I/O smaller than the fs block size,
-         * or not aligned to it.
-         */
-        if (args->lg_minlength < block_size) {
-                dprintk("pnfsd: I/O too small\n");
-                goto out_layoutunavailable;
-        }
        if (seg->offset & (block_size - 1)) {
                dprintk("pnfsd: I/O misaligned\n");
                goto out_layoutunavailable;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 46b8b2bbc95a..ee5aa4daaea0 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
        int found, ret;
        int set_maybe;
        int dispatch_assert = 0;
+        int dispatched = 0;
        if (!dlm_grab(dlm))
                return DLM_MASTER_RESP_NO;
@@ -1658,15 +1659,18 @@ send_response:
                        mlog(ML_ERROR, "failed to dispatch assert master work\n");
                        response = DLM_MASTER_RESP_ERROR;
                        dlm_lockres_put(res);
-                } else
+                } else {
+                        dispatched = 1;
                        __dlm_lockres_grab_inflight_worker(dlm, res);
+                }
                spin_unlock(&res->spinlock);
        } else {
                if (res)
                        dlm_lockres_put(res);
        }
-        dlm_put(dlm);
+        if (!dispatched)
+                dlm_put(dlm);
        return response;
 }
@@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
        /* queue up work for dlm_assert_master_worker */
-        dlm_grab(dlm);  /* get an extra ref for the work item */
        dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
        item->u.am.lockres = res; /* already have a ref */
        /* can optionally ignore node numbers higher than this node */
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index ce12e0b1a31f..3d90ad7ff91f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
        unsigned int hash;
        int master = DLM_LOCK_RES_OWNER_UNKNOWN;
        u32 flags = DLM_ASSERT_MASTER_REQUERY;
+        int dispatched = 0;
        if (!dlm_grab(dlm)) {
                /* since the domain has gone away on this
@@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
                                dlm_put(dlm);
                                /* sender will take care of this and retry */
                                return ret;
-                        } else
+                        } else {
+                                dispatched = 1;
                                __dlm_lockres_grab_inflight_worker(dlm, res);
+                        }
                        spin_unlock(&res->spinlock);
                } else {
                        /* put.. incase we are not the master */
@@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
        }
        spin_unlock(&dlm->spinlock);
-        dlm_put(dlm);
+        if (!dispatched)
+                dlm_put(dlm);
        return master;
 }
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index ba1323a94924..a586467f6ff6 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -70,6 +70,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        unsigned order;
        void *data;
        int ret;
+        gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
        /* make various checks */
        order = get_order(newsize);
@@ -84,7 +85,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        /* allocate enough contiguous pages to be able to satisfy the
         * request */
-        pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order);
+        pages = alloc_pages(gfp, order);
        if (!pages)
                return -ENOMEM;
@@ -108,7 +109,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
                struct page *page = pages + loop;
                ret = add_to_page_cache_lru(page, inode->i_mapping, loop,
-                                        GFP_KERNEL);
+                                        gfp);
                if (ret < 0)
                        goto add_error;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 96f3448b6eb4..fd65b3f1923c 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
 {
        int err;
-        mutex_lock(&inode->i_mutex);
        err = security_inode_init_security(inode, dentry, qstr,
                                           &init_xattrs, 0);
-        mutex_unlock(&inode->i_mutex);
        if (err) {
                struct ubifs_info *c = dentry->i_sb->s_fs_info;
                ubifs_err(c, "cannot initialize security for inode %lu, error %d",
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 634e676072cb..50311703135b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -467,8 +467,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
         * the fault_*wqh.
         */
        spin_lock(&ctx->fault_pending_wqh.lock);
-        __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0, &range);
+        __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
-        __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, &range);
+        __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
        spin_unlock(&ctx->fault_pending_wqh.lock);
        wake_up_poll(&ctx->fd_wqh, POLLHUP);
@@ -650,10 +650,10 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx,
        spin_lock(&ctx->fault_pending_wqh.lock);
        /* wake all in the range and autoremove */
        if (waitqueue_active(&ctx->fault_pending_wqh))
-                __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, 0,
+                __wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
                                     range);
        if (waitqueue_active(&ctx->fault_wqh))
-                __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, 0, range);
+                __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range);
        spin_unlock(&ctx->fault_pending_wqh.lock);
 }
@@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags)
        file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
                                  O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
-        if (IS_ERR(file))
+        if (IS_ERR(file)) {
+                mmput(ctx->mm);
                kmem_cache_free(userfaultfd_ctx_cachep, ctx);
+        }
 out:
        return file;
 }