66 files changed, 1199 insertions, 291 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..a91795e01a7f 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
        boff = tmp % bsize;
        if (boff) {
                bh = affs_bread_ino(inode, bidx, 0);
-                if (IS_ERR(bh))
+                if (IS_ERR(bh)) {
-                        return PTR_ERR(bh);
+                        written = PTR_ERR(bh);
+                        goto err_first_bh;
+                }
                tmp = min(bsize - boff, to - from);
                BUG_ON(boff + tmp > bsize || tmp > bsize);
                memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
                bidx++;
        } else if (bidx) {
                bh = affs_bread_ino(inode, bidx - 1, 0);
-                if (IS_ERR(bh))
+                if (IS_ERR(bh)) {
-                        return PTR_ERR(bh);
+                        written = PTR_ERR(bh);
+                        goto err_first_bh;
+                }
        }
        while (from + bsize <= to) {
                prev_bh = bh;
                bh = affs_getemptyblk_ino(inode, bidx);
                if (IS_ERR(bh))
-                        goto out;
+                        goto err_bh;
                memcpy(AFFS_DATA(bh), data + from, bsize);
                if (buffer_new(bh)) {
                        AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
                prev_bh = bh;
                bh = affs_bread_ino(inode, bidx, 1);
                if (IS_ERR(bh))
-                        goto out;
+                        goto err_bh;
                tmp = min(bsize, to - from);
                BUG_ON(tmp > bsize);
                memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +794,13 @@ done:
        if (tmp > inode->i_size)
                inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+err_first_bh:
        unlock_page(page);
        page_cache_release(page);
        return written;
-out:
+err_bh:
        bh = prev_bh;
        if (!written)
                written = PTR_ERR(bh);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 993642199326..6d67f32e648d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        parent_nritems = btrfs_header_nritems(parent);
        blocksize = root->nodesize;
-        end_slot = parent_nritems;
+        end_slot = parent_nritems - 1;
-        if (parent_nritems == 1)
+        if (parent_nritems <= 1)
                return 0;
        btrfs_set_lock_blocking(parent);
-        for (i = start_slot; i < end_slot; i++) {
+        for (i = start_slot; i <= end_slot; i++) {
                int close = 1;
                btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        other = btrfs_node_blockptr(parent, i - 1);
                        close = close_blocks(blocknr, other, blocksize);
                }
-                if (!close && i < end_slot - 2) {
+                if (!close && i < end_slot) {
                        other = btrfs_node_blockptr(parent, i + 1);
                        close = close_blocks(blocknr, other, blocksize);
                }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root);
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
                                    loff_t actual_len, u64 *alloc_hint);
 int btrfs_inode_check_errors(struct inode *inode);
 extern const struct dentry_operations btrfs_dentry_operations;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode);
+#endif
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        }
        if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
                        + sizeof(struct btrfs_chunk)) {
-                printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
+                printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
                                btrfs_super_sys_array_size(sb),
                                sizeof(struct btrfs_disk_key)
                                + sizeof(struct btrfs_chunk));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 571f402d3fc4..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
                return 0;
        }
+        if (trans->aborted)
+                return 0;
 again:
        inode = lookup_free_space_inode(root, block_group, path);
        if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
         */
        BTRFS_I(inode)->generation = 0;
        ret = btrfs_update_inode(trans, root, inode);
+        if (ret) {
+                /*
+                 * So theoretically we could recover from this, simply set the
+                 * super cache generation to 0 so we know to invalidate the
+                 * cache, but then we'd have to keep track of the block groups
+                 * that fail this way so we know we _have_ to reset this cache
+                 * before the next commit or risk reading stale cache.  So to
+                 * limit our exposure to horrible edge cases lets just abort the
+                 * transaction, this only happens in really bad situations
+                 * anyway.
+                 */
+                btrfs_abort_transaction(trans, root, ret);
+                goto out_put;
+        }
        WARN_ON(ret);
        if (i_size_read(inode) > 0) {
@@ -3309,6 +3325,32 @@ out:
        return ret;
 }
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root)
+{
+        struct btrfs_block_group_cache *cache, *tmp;
+        struct btrfs_transaction *cur_trans = trans->transaction;
+        struct btrfs_path *path;
+        if (list_empty(&cur_trans->dirty_bgs) ||
+            !btrfs_test_opt(root, SPACE_CACHE))
+                return 0;
+        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
+        /* Could add new block groups, use _safe just in case */
+        list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
+                                 dirty_list) {
+                if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                        cache_save_setup(cache, trans, path);
+        }
+        btrfs_free_path(path);
+        return 0;
+}
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
@@ -5094,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        num_bytes = ALIGN(num_bytes, root->sectorsize);
        spin_lock(&BTRFS_I(inode)->lock);
-        BTRFS_I(inode)->outstanding_extents++;
+        nr_extents = (unsigned)div64_u64(num_bytes +
+                                         BTRFS_MAX_EXTENT_SIZE - 1,
+                                         BTRFS_MAX_EXTENT_SIZE);
+        BTRFS_I(inode)->outstanding_extents += nr_extents;
+        nr_extents = 0;
        if (BTRFS_I(inode)->outstanding_extents >
            BTRFS_I(inode)->reserved_extents)
@@ -5239,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
        if (dropped > 0)
                to_free += btrfs_calc_trans_metadata_size(root, dropped);
+        if (btrfs_test_is_dummy_root(root))
+                return;
        trace_btrfs_space_reservation(root->fs_info, "delalloc",
                                      btrfs_ino(inode), to_free, 0);
        if (root->fs_info->quota_enabled) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
                /* Should be safe to release our pages at this point */
                btrfs_release_extent_buffer_page(eb);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+                if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
+                        __free_extent_buffer(eb);
+                        return 1;
+                }
+#endif
                call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
                return 1;
        }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbbac900d..30982bbd31c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
        mutex_unlock(&inode->i_mutex);
        /*
-         * we want to make sure fsync finds this change
-         * but we haven't joined a transaction running right now.
-         *
-         * Later on, someone is sure to update the inode and get the
-         * real transid recorded.
-         *
-         * We set last_trans now to the fs_info generation + 1,
-         * this will either be one more than the running transaction
-         * or the generation used for the next transaction if there isn't
-         * one running right now.
-         *
         * We also have to set last_sub_trans to the current log transid,
         * otherwise subsequent syncs to a file that's been synced in this
         * transaction will appear to have already occured.
         */
-        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
        if (num_written > 0) {
                err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        atomic_inc(&root->log_batch);
        /*
-         * check the transaction that last modified this inode
+         * If the last transaction that changed this file was before the current
-         * and see if its already been committed
+         * transaction and we have the full sync flag set in our inode, we can
-         */
+         * bail out now without any syncing.
-        if (!BTRFS_I(inode)->last_trans) {
+         *
-                mutex_unlock(&inode->i_mutex);
+         * Note that we can't bail out if the full sync flag isn't set. This is
-                goto out;
+         * because when the full sync flag is set we start all ordered extents
-        }
+         * and wait for them to fully complete - when they complete they update
+         * the inode's last_trans field through:
-        /*
+         *
-         * if the last transaction that changed this file was before
+         *     btrfs_finish_ordered_io() ->
-         * the current transaction, we can bail out now without any
+         *         btrfs_update_inode_fallback() ->
-         * syncing
+         *             btrfs_update_inode() ->
+         *                 btrfs_set_inode_last_trans()
+         *
+         * So we are sure that last_trans is up to date and can do this check to
+         * bail out safely. For the fast path, when the full sync flag is not
+         * set in our inode, we can not do it because we start only our ordered
+         * extents and don't wait for them to complete (that is when
+         * btrfs_finish_ordered_io runs), so here at this point their last_trans
+         * value might be less than or equals to fs_info->last_trans_committed,
+         * and setting a speculative last_trans for an inode when a buffered
+         * write is made (such as fs_info->generation + 1 for example) would not
+         * be reliable since after setting the value and before fsync is called
+         * any number of transactions can start and commit (transaction kthread
+         * commits the current transaction periodically), and a transaction
+         * commit does not start nor waits for ordered extents to complete.
         */
        smp_mb();
        if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-            BTRFS_I(inode)->last_trans <=
+            (full_sync && BTRFS_I(inode)->last_trans <=
-            root->fs_info->last_trans_committed) {
+             root->fs_info->last_trans_committed)) {
-                BTRFS_I(inode)->last_trans = 0;
                /*
                 * We'v had everything committed since the last time we were
                 * modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        bool same_page;
        bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
        u64 ino_size;
+        bool truncated_page = false;
+        bool updated_inode = false;
        ret = btrfs_wait_ordered_range(inode, offset, len);
        if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
         * entire page.
         */
        if (same_page && len < PAGE_CACHE_SIZE) {
-                if (offset < ino_size)
+                if (offset < ino_size) {
+                        truncated_page = true;
                        ret = btrfs_truncate_page(inode, offset, len, 0);
+                } else {
+                        ret = 0;
+                }
                goto out_only_mutex;
        }
        /* zero back part of the first page */
        if (offset < ino_size) {
+                truncated_page = true;
                ret = btrfs_truncate_page(inode, offset, 0, 0);
                if (ret) {
                        mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                if (!ret) {
                        /* zero the front end of the last page */
                        if (tail_start + tail_len < ino_size) {
+                                truncated_page = true;
                                ret = btrfs_truncate_page(inode,
                                                tail_start + tail_len, 0, 1);
                                if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
        }
        if (lockend < lockstart) {
-                mutex_unlock(&inode->i_mutex);
+                ret = 0;
-                return 0;
+                goto out_only_mutex;
        }
        while (1) {
@@ -2506,6 +2514,7 @@ out_trans:
        trans->block_rsv = &root->fs_info->trans_block_rsv;
        ret = btrfs_update_inode(trans, root, inode);
+        updated_inode = true;
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
 out_free:
@@ -2515,6 +2524,22 @@ out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                             &cached_state, GFP_NOFS);
 out_only_mutex:
+        if (!updated_inode && truncated_page && !ret && !err) {
+                /*
+                 * If we only end up zeroing part of a page, we still need to
+                 * update the inode item, so that all the time fields are
+                 * updated as well as the necessary btrfs inode in memory fields
+                 * for detecting, at fsync time, if the inode isn't yet in the
+                 * log tree or it's there but not up to date.
+                 */
+                trans = btrfs_start_transaction(root, 1);
+                if (IS_ERR(trans)) {
+                        err = PTR_ERR(trans);
+                } else {
+                        err = btrfs_update_inode(trans, root, inode);
+                        ret = btrfs_end_transaction(trans, root);
+                }
+        }
        mutex_unlock(&inode->i_mutex);
        if (ret && !err)
                err = ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23dfcddb..d2e732d7af52 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 static int btrfs_dirty_inode(struct inode *inode);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode)
+{
+        BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+}
+#endif
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
                                     const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
                u64 new_size;
                /*
-                 * We need the largest size of the remaining extent to see if we
+                 * See the explanation in btrfs_merge_extent_hook, the same
-                 * need to add a new outstanding extent.  Think of the following
+                 * applies here, just in reverse.
-                 * case
-                 *
-                 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
-                 *
-                 * The new_size would just be 4k and we'd think we had enough
-                 * outstanding extents for this if we only took one side of the
-                 * split, same goes for the other direction.  We need to see if
-                 * the larger size still is the same amount of extents as the
-                 * original size, because if it is we need to add a new
-                 * outstanding extent.  But if we split up and the larger size
-                 * is less than the original then we are good to go since we've
-                 * already accounted for the extra extent in our original
-                 * accounting.
                 */
                new_size = orig->end - split + 1;
-                if ((split - orig->start) > new_size)
+                num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                        new_size = split - orig->start;
-                num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
                                        BTRFS_MAX_EXTENT_SIZE);
-                if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                new_size = split - orig->start;
-                              BTRFS_MAX_EXTENT_SIZE) < num_extents)
+                num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                        BTRFS_MAX_EXTENT_SIZE);
+                if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+                              BTRFS_MAX_EXTENT_SIZE) >= num_extents)
                        return;
        }
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
        if (!(other->state & EXTENT_DELALLOC))
                return;
-        old_size = other->end - other->start + 1;
+        if (new->start > other->start)
-        new_size = old_size + (new->end - new->start + 1);
+                new_size = new->end - other->start + 1;
+        else
+                new_size = other->end - new->start + 1;
        /* we're not bigger than the max, unreserve the space and go */
        if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
        }
        /*
-         * If we grew by another max_extent, just return, we want to keep that
+         * We have to add up either side to figure out how many extents were
-         * reserved amount.
+         * accounted for before we merged into one big extent.  If the number of
+         * extents we accounted for is <= the amount we need for the new range
+         * then we can return, otherwise drop.  Think of it like this
+         *
+         * [ 4k][MAX_SIZE]
+         *
+         * So we've grown the extent by a MAX_SIZE extent, this would mean we
+         * need 2 outstanding extents, on one side we have 1 and the other side
+         * we have 1 so they are == and we can return.  But in this case
+         *
+         * [MAX_SIZE+4k][MAX_SIZE+4k]
+         *
+         * Each range on their own accounts for 2 extents, but merged together
+         * they are only 3 extents worth of accounting, so we need to drop in
+         * this case.
         */
+        old_size = other->end - other->start + 1;
        num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
                                BTRFS_MAX_EXTENT_SIZE);
+        old_size = new->end - new->start + 1;
+        num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                 BTRFS_MAX_EXTENT_SIZE);
        if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                      BTRFS_MAX_EXTENT_SIZE) > num_extents)
+                      BTRFS_MAX_EXTENT_SIZE) >= num_extents)
                return;
        spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
+                /* For sanity tests */
+                if (btrfs_test_is_dummy_root(root))
+                        return;
                __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
                                     root->fs_info->delalloc_batch);
                spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                    root != root->fs_info->tree_root)
                        btrfs_delalloc_release_metadata(inode, len);
+                /* For sanity tests. */
+                if (btrfs_test_is_dummy_root(root))
+                        return;
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
                    && do_list && !(state->state & EXTENT_NORESERVE))
                        btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
-        u64 orig_len = len;
+        u64 *outstanding_extents = NULL;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        lockstart = start;
        lockend = start + len - 1;
+        if (current->journal_info) {
+                /*
+                 * Need to pull our outstanding extents and set journal_info to NULL so
+                 * that anything that needs to check if there's a transction doesn't get
+                 * confused.
+                 */
+                outstanding_extents = current->journal_info;
+                current->journal_info = NULL;
+        }
        /*
         * If this errors out it's because we couldn't invalidate pagecache for
         * this range and we need to fallback to buffered.
@@ -7285,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
            ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
             em->block_start != EXTENT_MAP_HOLE)) {
                int type;
-                int ret;
                u64 block_start, orig_start, orig_block_len, ram_bytes;
                if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7349,11 +7381,20 @@ unlock:
                if (start + len > i_size_read(inode))
                        i_size_write(inode, start + len);
-                if (len < orig_len) {
+                /*
+                 * If we have an outstanding_extents count still set then we're
+                 * within our reservation, otherwise we need to adjust our inode
+                 * counter appropriately.
+                 */
+                if (*outstanding_extents) {
+                        (*outstanding_extents)--;
+                } else {
                        spin_lock(&BTRFS_I(inode)->lock);
                        BTRFS_I(inode)->outstanding_extents++;
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
+                current->journal_info = outstanding_extents;
                btrfs_free_reserved_data_space(inode, len);
        }
@@ -7377,6 +7418,8 @@ unlock:
 unlock_err:
        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                         unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+        if (outstanding_extents)
+                current->journal_info = outstanding_extents;
        return ret;
 }
@@ -8076,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+        u64 outstanding_extents = 0;
        size_t count = 0;
        int flags = 0;
        bool wakeup = true;
@@ -8113,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                ret = btrfs_delalloc_reserve_space(inode, count);
                if (ret)
                        goto out;
+                outstanding_extents = div64_u64(count +
+                                                BTRFS_MAX_EXTENT_SIZE - 1,
+                                                BTRFS_MAX_EXTENT_SIZE);
+                /*
+                 * We need to know how many extents we reserved so that we can
+                 * do the accounting properly if we go over the number we
+                 * originally calculated.  Abuse current->journal_info for this.
+                 */
+                current->journal_info = &outstanding_extents;
        } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                     &BTRFS_I(inode)->runtime_flags)) {
                inode_dio_done(inode);
@@ -8125,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                        iter, offset, btrfs_get_blocks_direct, NULL,
                        btrfs_submit_direct, flags);
        if (rw & WRITE) {
+                current->journal_info = NULL;
                if (ret < 0 && ret != -EIOCBQUEUED)
                        btrfs_delalloc_release_space(inode, count);
                else if (ret >= 0 && (size_t)ret < count)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
                        continue;
                if (entry_end(ordered) <= start)
                        break;
-                if (!list_empty(&ordered->log_list))
+                if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
-                        continue;
-                if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
                        continue;
                list_add(&ordered->log_list, logged_list);
                atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
                wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
                                                   &ordered->flags));
-                if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
+                list_add_tail(&ordered->trans_list, &trans->ordered);
-                        list_add_tail(&ordered->trans_list, &trans->ordered);
                spin_lock_irq(&log->log_extents_lock[index]);
        }
        spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
        if (oper1->seq < oper2->seq)
                return -1;
        if (oper1->seq > oper2->seq)
-                return -1;
+                return 1;
        if (oper1->ref_root < oper2->ref_root)
                return -1;
        if (oper1->ref_root > oper2->ref_root)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe5857223515..d6033f540cc7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
        u64 parent_ino;
        u64 ino;
        u64 gen;
+        bool is_orphan;
        struct list_head update_refs;
 };
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
                                u64 ino_gen,
                                u64 parent_ino,
                                struct list_head *new_refs,
-                                struct list_head *deleted_refs)
+                                struct list_head *deleted_refs,
+                                const bool is_orphan)
 {
        struct rb_node **p = &sctx->pending_dir_moves.rb_node;
        struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
        pm->parent_ino = parent_ino;
        pm->ino = ino;
        pm->gen = ino_gen;
+        pm->is_orphan = is_orphan;
        INIT_LIST_HEAD(&pm->list);
        INIT_LIST_HEAD(&pm->update_refs);
        RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
        rmdir_ino = dm->rmdir_ino;
        free_waiting_dir_move(sctx, dm);
-        ret = get_first_ref(sctx->parent_root, pm->ino,
+        if (pm->is_orphan) {
-                            &parent_ino, &parent_gen, name);
+                ret = gen_unique_name(sctx, pm->ino,
-        if (ret < 0)
+                                      pm->gen, from_path);
-                goto out;
+        } else {
+                ret = get_first_ref(sctx->parent_root, pm->ino,
-        ret = get_cur_path(sctx, parent_ino, parent_gen,
+                                    &parent_ino, &parent_gen, name);
-                           from_path);
+                if (ret < 0)
-        if (ret < 0)
+                        goto out;
-                goto out;
+                ret = get_cur_path(sctx, parent_ino, parent_gen,
-        ret = fs_path_add_path(from_path, name);
+                                   from_path);
+                if (ret < 0)
+                        goto out;
+                ret = fs_path_add_path(from_path, name);
+        }
        if (ret < 0)
                goto out;
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                LIST_HEAD(deleted_refs);
                ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
                ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
-                                           &pm->update_refs, &deleted_refs);
+                                           &pm->update_refs, &deleted_refs,
+                                           pm->is_orphan);
                if (ret < 0)
                        goto out;
                if (rmdir_ino) {
@@ -3283,6 +3291,127 @@ out:
        return ret;
 }
+/*
+ * We might need to delay a directory rename even when no ancestor directory
+ * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
+ * renamed. This happens when we rename a directory to the old name (the name
+ * in the parent root) of some other unrelated directory that got its rename
+ * delayed due to some ancestor with higher number that got renamed.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ * .                                       (ino 256)
+ * |---- a/                                (ino 257)
+ * |     |---- file                        (ino 260)
+ * |
+ * |---- b/                                (ino 258)
+ * |---- c/                                (ino 259)
+ *
+ * Send snapshot:
+ * .                                       (ino 256)
+ * |---- a/                                (ino 258)
+ * |---- x/                                (ino 259)
+ *       |---- y/                          (ino 257)
+ *             |----- file                 (ino 260)
+ *
+ * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
+ * from 'a' to 'x/y' happening first, which in turn depends on the rename of
+ * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
+ * must issue is:
+ *
+ * 1 - rename 259 from 'c' to 'x'
+ * 2 - rename 257 from 'a' to 'x/y'
+ * 3 - rename 258 from 'b' to 'a'
+ *
+ * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
+ * be done right away and < 0 on error.
+ */
+static int wait_for_dest_dir_move(struct send_ctx *sctx,
+                                  struct recorded_ref *parent_ref,
+                                  const bool is_orphan)
+{
+        struct btrfs_path *path;
+        struct btrfs_key key;
+        struct btrfs_key di_key;
+        struct btrfs_dir_item *di;
+        u64 left_gen;
+        u64 right_gen;
+        int ret = 0;
+        if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
+                return 0;
+        path = alloc_path_for_send();
+        if (!path)
+                return -ENOMEM;
+        key.objectid = parent_ref->dir;
+        key.type = BTRFS_DIR_ITEM_KEY;
+        key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
+        ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+        if (ret < 0) {
+                goto out;
+        } else if (ret > 0) {
+                ret = 0;
+                goto out;
+        }
+        di = btrfs_match_dir_item_name(sctx->parent_root, path,
+                                       parent_ref->name, parent_ref->name_len);
+        if (!di) {
+                ret = 0;
+                goto out;
+        }
+        /*
+         * di_key.objectid has the number of the inode that has a dentry in the
+         * parent directory with the same name that sctx->cur_ino is being
+         * renamed to. We need to check if that inode is in the send root as
+         * well and if it is currently marked as an inode with a pending rename,
+         * if it is, we need to delay the rename of sctx->cur_ino as well, so
+         * that it happens after that other inode is renamed.
+         */
+        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
+        if (di_key.type != BTRFS_INODE_ITEM_KEY) {
+                ret = 0;
+                goto out;
+        }
+        ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
+                             &left_gen, NULL, NULL, NULL, NULL);
+        if (ret < 0)
+                goto out;
+        ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
+                             &right_gen, NULL, NULL, NULL, NULL);
+        if (ret < 0) {
+                if (ret == -ENOENT)
+                        ret = 0;
+                goto out;
+        }
+        /* Different inode, no need to delay the rename of sctx->cur_ino */
+        if (right_gen != left_gen) {
+                ret = 0;
+                goto out;
+        }
+        if (is_waiting_for_move(sctx, di_key.objectid)) {
+                ret = add_pending_dir_move(sctx,
+                                           sctx->cur_ino,
+                                           sctx->cur_inode_gen,
+                                           di_key.objectid,
+                                           &sctx->new_refs,
+                                           &sctx->deleted_refs,
+                                           is_orphan);
+                if (!ret)
+                        ret = 1;
+        }
+out:
+        btrfs_free_path(path);
+        return ret;
+}
 static int wait_for_parent_move(struct send_ctx *sctx,
                                struct recorded_ref *parent_ref)
 {
@@ -3349,7 +3478,8 @@ out:
                                           sctx->cur_inode_gen,
                                           ino,
                                           &sctx->new_refs,
-                                           &sctx->deleted_refs);
+                                           &sctx->deleted_refs,
+                                           false);
                if (!ret)
                        ret = 1;
        }
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
        int did_overwrite = 0;
        int is_orphan = 0;
        u64 last_dir_ino_rm = 0;
+        bool can_rename = true;
 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                        }
                }
+                if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
+                        ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
+                        if (ret < 0)
+                                goto out;
+                        if (ret == 1) {
+                                can_rename = false;
+                                *pending_move = 1;
+                        }
+                }
                /*
                 * link/move the ref to the new place. If we have an orphan
                 * inode, move it and update valid_path. If not, link or move
                 * it depending on the inode mode.
                 */
-                if (is_orphan) {
+                if (is_orphan && can_rename) {
                        ret = send_rename(sctx, valid_path, cur->full_path);
                        if (ret < 0)
                                goto out;
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                        ret = fs_path_copy(valid_path, cur->full_path);
                        if (ret < 0)
                                goto out;
-                } else {
+                } else if (can_rename) {
                        if (S_ISDIR(sctx->cur_inode_mode)) {
                                /*
                                 * Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
        return ret;
 }
+static int test_extent_accounting(void)
+{
+        struct inode *inode = NULL;
+        struct btrfs_root *root = NULL;
+        int ret = -ENOMEM;
+        inode = btrfs_new_test_inode();
+        if (!inode) {
+                test_msg("Couldn't allocate inode\n");
+                return ret;
+        }
+        root = btrfs_alloc_dummy_root();
+        if (IS_ERR(root)) {
+                test_msg("Couldn't allocate root\n");
+                goto out;
+        }
+        root->fs_info = btrfs_alloc_dummy_fs_info();
+        if (!root->fs_info) {
+                test_msg("Couldn't allocate dummy fs info\n");
+                goto out;
+        }
+        BTRFS_I(inode)->root = root;
+        btrfs_test_inode_set_ops(inode);
+        /* [BTRFS_MAX_EXTENT_SIZE] */
+        BTRFS_I(inode)->outstanding_extents++;
+        ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
+                                        NULL);
+        if (ret) {
+                test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 1) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 1, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /* [BTRFS_MAX_EXTENT_SIZE][4k] */
+        BTRFS_I(inode)->outstanding_extents++;
+        ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
+                                        BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
+        if (ret) {
+                test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 2) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 2, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
+        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+                               BTRFS_MAX_EXTENT_SIZE >> 1,
+                               (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+                               EXTENT_DELALLOC | EXTENT_DIRTY |
+                               EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
+                               NULL, GFP_NOFS);
+        if (ret) {
+                test_msg("clear_extent_bit returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 2) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 2, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /* [BTRFS_MAX_EXTENT_SIZE][4K] */
+        BTRFS_I(inode)->outstanding_extents++;
+        ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
+                                        (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+                                        NULL);
+        if (ret) {
+                test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 2) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 2, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /*
+         * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
+         *
+         * I'm artificially adding 2 to outstanding_extents because in the
+         * buffered IO case we'd add things up as we go, but I don't feel like
+         * doing that here, this isn't the interesting case we want to test.
+         */
+        BTRFS_I(inode)->outstanding_extents += 2;
+        ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
+                                        (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
+                                        NULL);
+        if (ret) {
+                test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 4) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 4, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
+        BTRFS_I(inode)->outstanding_extents++;
+        ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
+                                        BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+        if (ret) {
+                test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 3) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 3, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
+        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+                               BTRFS_MAX_EXTENT_SIZE+4096,
+                               BTRFS_MAX_EXTENT_SIZE+8191,
+                               EXTENT_DIRTY | EXTENT_DELALLOC |
+                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+                               NULL, GFP_NOFS);
+        if (ret) {
+                test_msg("clear_extent_bit returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 4) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 4, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /*
+         * Refill the hole again just for good measure, because I thought it
+         * might fail and I'd rather satisfy my paranoia at this point.
+         */
+        BTRFS_I(inode)->outstanding_extents++;
+        ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
+                                        BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+        if (ret) {
+                test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents != 3) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 3, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        /* Empty */
+        ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+                               EXTENT_DIRTY | EXTENT_DELALLOC |
+                               EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+                               NULL, GFP_NOFS);
+        if (ret) {
+                test_msg("clear_extent_bit returned %d\n", ret);
+                goto out;
+        }
+        if (BTRFS_I(inode)->outstanding_extents) {
+                ret = -EINVAL;
+                test_msg("Miscount, wanted 0, got %u\n",
+                         BTRFS_I(inode)->outstanding_extents);
+                goto out;
+        }
+        ret = 0;
+out:
+        if (ret)
+                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+                                 EXTENT_DIRTY | EXTENT_DELALLOC |
+                                 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+                                 NULL, GFP_NOFS);
+        iput(inode);
+        btrfs_free_dummy_root(root);
+        return ret;
+}
 int btrfs_test_inodes(void)
 {
        int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
        if (ret)
                return ret;
        test_msg("Running hole first btrfs_get_extent test\n");
-        return test_hole_first();
+        ret = test_hole_first();
+        if (ret)
+                return ret;
+        test_msg("Running outstanding_extents tests\n");
+        return test_extent_accounting();
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7e80f32550a6..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
        u64 old_root_bytenr;
        u64 old_root_used;
        struct btrfs_root *tree_root = root->fs_info->tree_root;
-        bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
        old_root_used = btrfs_root_used(&root->root_item);
-        btrfs_write_dirty_block_groups(trans, root);
        while (1) {
                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
                if (old_root_bytenr == root->node->start &&
-                    old_root_used == btrfs_root_used(&root->root_item) &&
+                    old_root_used == btrfs_root_used(&root->root_item))
-                    (!extent_root ||
-                     list_empty(&trans->transaction->dirty_bgs)))
                        break;
                btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,17 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                        return ret;
                old_root_used = btrfs_root_used(&root->root_item);
-                if (extent_root) {
-                        ret = btrfs_write_dirty_block_groups(trans, root);
-                        if (ret)
-                                return ret;
-                }
-                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-                if (ret)
-                        return ret;
-                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-                if (ret)
-                        return ret;
        }
        return 0;
@@ -1071,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                                         struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
+        struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
        struct list_head *next;
        struct extent_buffer *eb;
        int ret;
@@ -1098,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
+        ret = btrfs_setup_space_cache(trans, root);
+        if (ret)
+                return ret;
        /* run_qgroups might have added some more refs */
        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
        if (ret)
                return ret;
+again:
        while (!list_empty(&fs_info->dirty_cowonly_roots)) {
                next = fs_info->dirty_cowonly_roots.next;
                list_del_init(next);
@@ -1115,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                ret = update_cowonly_root(trans, root);
                if (ret)
                        return ret;
+                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+                if (ret)
+                        return ret;
        }
+        while (!list_empty(dirty_bgs)) {
+                ret = btrfs_write_dirty_block_groups(trans, root);
+                if (ret)
+                        return ret;
+                ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+                if (ret)
+                        return ret;
+        }
+        if (!list_empty(&fs_info->dirty_cowonly_roots))
+                goto again;
        list_add_tail(&fs_info->extent_root->dirty_list,
                      &trans->transaction->switch_commits);
        btrfs_after_dev_replace_commit(fs_info);
@@ -1814,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                wait_for_commit(root, cur_trans);
+                if (unlikely(cur_trans->aborted))
+                        ret = cur_trans->aborted;
                btrfs_put_transaction(cur_trans);
                return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a37f8b39bae..c5b8ba37f88e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1012,7 +1012,7 @@ again:
                base = btrfs_item_ptr_offset(leaf, path->slots[0]);
                while (cur_offset < item_size) {
-                        extref = (struct btrfs_inode_extref *)base + cur_offset;
+                        extref = (struct btrfs_inode_extref *)(base + cur_offset);
                        victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cd4d1315aaa9..8222f6f74147 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4903,10 +4903,17 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
 static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
 {
        struct btrfs_bio *bbio = kzalloc(
+                 /* the size of the btrfs_bio */
                sizeof(struct btrfs_bio) +
+                /* plus the variable array for the stripes */
                sizeof(struct btrfs_bio_stripe) * (total_stripes) +
+                /* plus the variable array for the tgt dev */
                sizeof(int) * (real_stripes) +
-                sizeof(u64) * (real_stripes),
+                /*
+                 * plus the raid_map, which includes both the tgt dev
+                 * and the stripes
+                 */
+                sizeof(u64) * (total_stripes),
                GFP_NOFS);
        if (!bbio)
                return NULL;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..883b93623bc5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                                        name, name_len, -1);
                if (!di && (flags & XATTR_REPLACE))
                        ret = -ENODATA;
+                else if (IS_ERR(di))
+                        ret = PTR_ERR(di);
                else if (di)
                        ret = btrfs_delete_one_dir_name(trans, root, path, di);
                goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                ASSERT(mutex_is_locked(&inode->i_mutex));
                di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
                                        name, name_len, 0);
-                if (!di) {
+                if (!di)
                        ret = -ENODATA;
+                else if (IS_ERR(di))
+                        ret = PTR_ERR(di);
+                if (ret)
                        goto out;
-                }
                btrfs_release_path(path);
                di = NULL;
        }
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ac7445e6ec7..aa0dc2573374 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,6 +1,9 @@
 /*
 *   fs/cifs/cifsencrypt.c
 *
+ *   Encryption and hashing operations relating to NTLM, NTLMv2.  See MS-NLMP
+ *   for more detailed information
+ *
 *   Copyright (C) International Business Machines  Corp., 2005,2013
 *   Author(s): Steve French (sfrench@us.ibm.com)
 *
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
                                 __func__);
                        return rc;
                }
-        } else if (ses->serverName) {
+        } else {
+                /* We use ses->serverName if no domain name available */
                len = strlen(ses->serverName);
                server = kmalloc(2 + (len * 2), GFP_KERNEL);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3aa999ab785..480cf9c81d50 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1599,6 +1599,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                pr_warn("CIFS: username too long\n");
                                goto cifs_parse_mount_err;
                        }
+                        kfree(vol->username);
                        vol->username = kstrdup(string, GFP_KERNEL);
                        if (!vol->username)
                                goto cifs_parse_mount_err;
@@ -1700,6 +1702,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                goto cifs_parse_mount_err;
                        }
+                        kfree(vol->domainname);
                        vol->domainname = kstrdup(string, GFP_KERNEL);
                        if (!vol->domainname) {
                                pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                        }
                         if (strncasecmp(string, "default", 7) != 0) {
+                                kfree(vol->iocharset);
                                vol->iocharset = kstrdup(string,
                                                         GFP_KERNEL);
                                if (!vol->iocharset) {
@@ -2913,8 +2917,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
                 * calling name ends in null (byte 16) from old smb
                 * convention.
                 */
-                if (server->workstation_RFC1001_name &&
+                if (server->workstation_RFC1001_name[0] != 0)
-                    server->workstation_RFC1001_name[0] != 0)
                        rfc1002mangle(ses_init_buf->trailer.
                                      session_req.calling_name,
                                      server->workstation_RFC1001_name,
@@ -3692,6 +3695,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
 #endif /* CIFS_WEAK_PW_HASH */
                rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
                                        bcc_ptr, nls_codepage);
+                if (rc) {
+                        cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
+                                 __func__, rc);
+                        cifs_buf_release(smb_buffer);
+                        return rc;
+                }
                bcc_ptr += CIFS_AUTH_RESP_SIZE;
                if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a94b3e673182..ca30c391a894 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1823,6 +1823,7 @@ refind_writable:
                        cifsFileInfo_put(inv_file);
                        spin_lock(&cifs_file_list_lock);
                        ++refind;
+                        inv_file = NULL;
                        goto refind_writable;
                }
        }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2d4f37235ed0..3e126d7bb2ea 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                                cifs_buf_release(srchinf->ntwrk_buf_start);
                        }
                        kfree(srchinf);
+                        if (rc)
+                                goto cgii_exit;
        } else
                goto cgii_exit;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 689f035915cf..22dfdf17d065 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
        /* return pointer to beginning of data area, ie offset from SMB start */
        if ((*off != 0) && (*len != 0))
-                return hdr->ProtocolId + *off;
+                return (char *)(&hdr->ProtocolId[0]) + *off;
        else
                return NULL;
 }
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 96b5d40a2ece..eab05e1aa587 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid,
                        /* No need to change MaxChunks since already set to 1 */
                        chunk_sizes_updated = true;
-                }
+                } else
+                        goto cchunk_out;
        }
 cchunk_out:
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3417340bf89e..65cd7a84c8bc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        struct smb2_ioctl_req *req;
        struct smb2_ioctl_rsp *rsp;
        struct TCP_Server_Info *server;
-        struct cifs_ses *ses = tcon->ses;
+        struct cifs_ses *ses;
        struct kvec iov[2];
        int resp_buftype;
        int num_iovecs;
@@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        if (plen)
                *plen = 0;
+        if (tcon)
+                ses = tcon->ses;
+        else
+                return -EIO;
        if (ses && (ses->server))
                server = ses->server;
        else
@@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
        if ((rc != 0) && (rc != -EINVAL)) {
-                if (tcon)
+                cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
-                        cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
                goto ioctl_exit;
        } else if (rc == -EINVAL) {
                if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
                    (opcode != FSCTL_SRV_COPYCHUNK)) {
-                        if (tcon)
+                        cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
-                                cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
                        goto ioctl_exit;
                }
        }
@@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
-        if ((rc != 0) && tcon)
+        if (rc != 0)
                cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
        free_rsp_buf(resp_buftype, iov[0].iov_base);
@@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
        struct kvec iov[2];
        int rc = 0;
        int len;
-        int resp_buftype;
+        int resp_buftype = CIFS_NO_BUFFER;
        unsigned char *bufptr;
        struct TCP_Server_Info *server;
        struct cifs_ses *ses = tcon->ses;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 90d1882b306f..5ba029e627cc 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key)
 }
 #define ECRYPTFS_MAX_KEYSET_SIZE 1024
-#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
+#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31
 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64
 #define ECRYPTFS_MAX_IV_BYTES 16        /* 128 bits */
 #define ECRYPTFS_SALT_BYTES 2
@@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat {
        struct crypto_ablkcipher *tfm;
        struct crypto_hash *hash_tfm; /* Crypto context for generating
                                       * the initialization vectors */
-        unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+        unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
        unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
        unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
        struct list_head keysig_list;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b07731e68c0b..fd39bad6f1bd 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -303,9 +303,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        struct file *lower_file = ecryptfs_file_to_lower(file);
        long rc = -ENOTTY;
-        if (lower_file->f_op->unlocked_ioctl)
+        if (!lower_file->f_op->unlocked_ioctl)
+                return rc;
+        switch (cmd) {
+        case FITRIM:
+        case FS_IOC_GETFLAGS:
+        case FS_IOC_SETFLAGS:
+        case FS_IOC_GETVERSION:
+        case FS_IOC_SETVERSION:
                rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
-        return rc;
+                fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+                return rc;
+        default:
+                return rc;
+        }
 }
 #ifdef CONFIG_COMPAT
@@ -315,9 +328,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        struct file *lower_file = ecryptfs_file_to_lower(file);
        long rc = -ENOIOCTLCMD;
-        if (lower_file->f_op->compat_ioctl)
+        if (!lower_file->f_op->compat_ioctl)
+                return rc;
+        switch (cmd) {
+        case FITRIM:
+        case FS_IOC32_GETFLAGS:
+        case FS_IOC32_SETFLAGS:
+        case FS_IOC32_GETVERSION:
+        case FS_IOC32_SETVERSION:
                rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
-        return rc;
+                fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+                return rc;
+        default:
+                return rc;
+        }
 }
 #endif
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 917bd5c9776a..6bd67e2011f0 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
        struct blkcipher_desc desc;
        char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
        char iv[ECRYPTFS_MAX_IV_BYTES];
-        char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+        char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
 };
 /**
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1895d60f4122..c095d3264259 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
        if (!cipher_name_set) {
                int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
-                BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+                BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE);
                strcpy(mount_crypt_stat->global_default_cipher_name,
                       ECRYPTFS_DEFAULT_CIPHER);
        }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
        struct completion *done;        /* set if the caller waits */
 };
+/*
+ * If an inode is constantly having its pages dirtied, but then the
+ * updates stop dirtytime_expire_interval seconds in the past, it's
+ * possible for the worst case time between when an inode has its
+ * timestamps updated and when they finally get written out to be two
+ * dirtytime_expire_intervals.  We set the default to 12 hours (in
+ * seconds), which means most of the time inodes will have their
+ * timestamps written to disk after 12 hours, but in the worst case a
+ * few inodes might not their timestamps updated for 24 hours.
+ */
+unsigned int dirtytime_expire_interval = 12 * 60 * 60;
 /**
 * writeback_in_progress - determine whether there is writeback in progress
 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
        if ((flags & EXPIRE_DIRTY_ATIME) == 0)
                older_than_this = work->older_than_this;
-        else if ((work->reason == WB_REASON_SYNC) == 0) {
+        else if (!work->for_sync) {
-                expire_time = jiffies - (HZ * 86400);
+                expire_time = jiffies - (dirtytime_expire_interval * HZ);
                older_than_this = &expire_time;
        }
        while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
                 */
                redirty_tail(inode, wb);
        } else if (inode->i_state & I_DIRTY_TIME) {
+                inode->dirtied_when = jiffies;
                list_move(&inode->i_wb_list, &wb->b_dirty_time);
        } else {
                /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
        spin_lock(&inode->i_lock);
        dirty = inode->i_state & I_DIRTY;
-        if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
+        if (inode->i_state & I_DIRTY_TIME) {
-             (inode->i_state & I_DIRTY_TIME)) ||
+                if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
-            (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
+                    unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
-                dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+                    unlikely(time_after(jiffies,
-                trace_writeback_lazytime(inode);
+                                        (inode->dirtied_time_when +
-        }
+                                         dirtytime_expire_interval * HZ)))) {
+                        dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+                        trace_writeback_lazytime(inode);
+                }
+        } else
+                inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
        inode->i_state &= ~dirty;
        /*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
        rcu_read_unlock();
 }
+/*
+ * Wake up bdi's periodically to make sure dirtytime inodes gets
+ * written back periodically.  We deliberately do *not* check the
+ * b_dirtytime list in wb_has_dirty_io(), since this would cause the
+ * kernel to be constantly waking up once there are any dirtytime
+ * inodes on the system.  So instead we define a separate delayed work
+ * function which gets called much more rarely.  (By default, only
+ * once every 12 hours.)
+ *
+ * If there is any other write activity going on in the file system,
+ * this function won't be necessary.  But if the only thing that has
+ * happened on the file system is a dirtytime inode caused by an atime
+ * update, we need this infrastructure below to make sure that inode
+ * eventually gets pushed out to disk.
+ */
+static void wakeup_dirtytime_writeback(struct work_struct *w);
+static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
+static void wakeup_dirtytime_writeback(struct work_struct *w)
+{
+        struct backing_dev_info *bdi;
+        rcu_read_lock();
+        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
+                if (list_empty(&bdi->wb.b_dirty_time))
+                        continue;
+                bdi_wakeup_thread(bdi);
+        }
+        rcu_read_unlock();
+        schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+}
+static int __init start_dirtytime_writeback(void)
+{
+        schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+        return 0;
+}
+__initcall(start_dirtytime_writeback);
+int dirtytime_interval_handler(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int ret;
+        ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+        if (ret == 0 && write)
+                mod_delayed_work(system_wq, &dirtytime_work, 0);
+        return ret;
+}
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
        if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        }
                        inode->dirtied_when = jiffies;
-                        list_move(&inode->i_wb_list, dirtytime ?
+                        if (dirtytime)
-                                  &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
+                                inode->dirtied_time_when = jiffies;
+                        if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+                                list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+                        else
+                                list_move(&inode->i_wb_list,
+                                          &bdi->wb.b_dirty_time);
                        spin_unlock(&bdi->wb.list_lock);
                        trace_writeback_dirty_inode_enqueue(inode);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d622fa..39706c57ad3c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -890,8 +890,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
        newpage = buf->page;
-        if (WARN_ON(!PageUptodate(newpage)))
+        if (!PageUptodate(newpage))
-                return -EIO;
+                SetPageUptodate(newpage);
        ClearPageMappedToDisk(newpage);
@@ -1353,6 +1353,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
        return err;
 }
+static int fuse_dev_open(struct inode *inode, struct file *file)
+{
+        /*
+         * The fuse device's file's private_data is used to hold
+         * the fuse_conn(ection) when it is mounted, and is used to
+         * keep track of whether the file has been mounted already.
+         */
+        file->private_data = NULL;
+        return 0;
+}
 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
                              unsigned long nr_segs, loff_t pos)
 {
@@ -1797,6 +1808,9 @@ copy_finish:
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
                       unsigned int size, struct fuse_copy_state *cs)
 {
+        /* Don't try to move pages (yet) */
+        cs->move_pages = 0;
        switch (code) {
        case FUSE_NOTIFY_POLL:
                return fuse_notify_poll(fc, size, cs);
@@ -2217,6 +2231,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
 const struct file_operations fuse_dev_operations = {
        .owner          = THIS_MODULE,
+        .open           = fuse_dev_open,
        .llseek         = no_llseek,
        .read           = do_sync_read,
        .aio_read       = fuse_dev_read,
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
        hfs_bnode_write(node, entry, data_off + key_len, entry_len);
        hfs_bnode_dump(node);
-        if (new_node) {
+        /*
-                /* update parent key if we inserted a key
+         * update parent key if we inserted a key
-                 * at the start of the first node
+         * at the start of the node and it is not the new node
-                 */
+         */
-                if (!rec && new_node != node)
+        if (!rec && new_node != node) {
-                        hfs_brec_update_parent(fd);
+                hfs_bnode_read_key(node, fd->search_key, data_off + size);
+                hfs_brec_update_parent(fd);
+        }
+        if (new_node) {
                hfs_bnode_put(fd->bnode);
                if (!new_node->parent) {
                        hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
                goto again;
        }
-        if (!rec)
-                hfs_brec_update_parent(fd);
        return 0;
 }
@@ -370,6 +370,8 @@ again:
        if (IS_ERR(parent))
                return PTR_ERR(parent);
        __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
+        if (fd->record < 0)
+                return -ENOENT;
        hfs_bnode_dump(parent);
        rec = fd->record;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b684e8a132e6..2bacb9988566 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
                goto out_free;
        }
+        of->event = atomic_read(&of->kn->attr.open->event);
        ops = kernfs_ops(of->kn);
        if (ops->read)
                len = ops->read(of, buf, len, *ppos);
diff --git a/fs/locks.c b/fs/locks.c
index 365c82e1b3a9..40bc384728c0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1388,9 +1388,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
 int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
        int error = 0;
-        struct file_lock *new_fl;
        struct file_lock_context *ctx = inode->i_flctx;
-        struct file_lock *fl;
+        struct file_lock *new_fl, *fl, *tmp;
        unsigned long break_time;
        int want_write = (mode & O_ACCMODE) != O_RDONLY;
        LIST_HEAD(dispose);
@@ -1420,7 +1419,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
                        break_time++;   /* so that 0 means no break time */
        }
-        list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
                if (!leases_conflict(fl, new_fl))
                        continue;
                if (want_write) {
@@ -1665,7 +1664,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
        }
        if (my_fl != NULL) {
-                error = lease->fl_lmops->lm_change(my_fl, arg, &dispose);
+                lease = my_fl;
+                error = lease->fl_lmops->lm_change(lease, arg, &dispose);
                if (error)
                        goto out;
                goto out_setup;
@@ -1727,7 +1727,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
                        break;
                }
        }
-        trace_generic_delete_lease(inode, fl);
+        trace_generic_delete_lease(inode, victim);
        if (victim)
                error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f9f4845db989..19874151e95c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 static bool nfs_client_init_is_complete(const struct nfs_client *clp)
 {
-        return clp->cl_cons_state != NFS_CS_INITING;
+        return clp->cl_cons_state <= NFS_CS_READY;
 }
 int nfs_wait_client_init_complete(const struct nfs_client *clp)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index a1f0685b42ff..a6ad68865880 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
                        clear_bit(NFS_DELEGATION_NEED_RECLAIM,
                                  &delegation->flags);
                        spin_unlock(&delegation->lock);
-                        put_rpccred(oldcred);
                        rcu_read_unlock();
+                        put_rpccred(oldcred);
                        trace_nfs4_reclaim_delegation(inode, res->delegation_type);
                } else {
                        /* We appear to have raced with a delegation return. */
@@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
                        delegation = NULL;
                        goto out;
                }
-                freeme = nfs_detach_delegation_locked(nfsi, 
+                if (test_and_set_bit(NFS_DELEGATION_RETURNING,
+                                        &old_delegation->flags))
+                        goto out;
+                freeme = nfs_detach_delegation_locked(nfsi,
                                old_delegation, clp);
                if (freeme == NULL)
                        goto out;
@@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
 {
        bool ret = false;
+        if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+                goto out;
        if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
                ret = true;
        if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
                        ret = true;
                spin_unlock(&delegation->lock);
        }
+out:
        return ret;
 }
@@ -471,14 +477,20 @@ restart:
                                                                super_list) {
                        if (!nfs_delegation_need_return(delegation))
                                continue;
-                        inode = nfs_delegation_grab_inode(delegation);
+                        if (!nfs_sb_active(server->super))
-                        if (inode == NULL)
                                continue;
+                        inode = nfs_delegation_grab_inode(delegation);
+                        if (inode == NULL) {
+                                rcu_read_unlock();
+                                nfs_sb_deactive(server->super);
+                                goto restart;
+                        }
                        delegation = nfs_start_delegation_return_locked(NFS_I(inode));
                        rcu_read_unlock();
                        err = nfs_end_delegation_return(inode, delegation, 0);
                        iput(inode);
+                        nfs_sb_deactive(server->super);
                        if (!err)
                                goto restart;
                        set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
@@ -809,19 +821,30 @@ restart:
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                list_for_each_entry_rcu(delegation, &server->delegations,
                                                                super_list) {
+                        if (test_bit(NFS_DELEGATION_RETURNING,
+                                                &delegation->flags))
+                                continue;
                        if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
                                                &delegation->flags) == 0)
                                continue;
-                        inode = nfs_delegation_grab_inode(delegation);
+                        if (!nfs_sb_active(server->super))
-                        if (inode == NULL)
                                continue;
-                        delegation = nfs_detach_delegation(NFS_I(inode),
+                        inode = nfs_delegation_grab_inode(delegation);
-                                        delegation, server);
+                        if (inode == NULL) {
+                                rcu_read_unlock();
+                                nfs_sb_deactive(server->super);
+                                goto restart;
+                        }
+                        delegation = nfs_start_delegation_return_locked(NFS_I(inode));
                        rcu_read_unlock();
+                        if (delegation != NULL) {
-                        if (delegation != NULL)
+                                delegation = nfs_detach_delegation(NFS_I(inode),
-                                nfs_free_delegation(delegation);
+                                        delegation, server);
+                                if (delegation != NULL)
+                                        nfs_free_delegation(delegation);
+                        }
                        iput(inode);
+                        nfs_sb_deactive(server->super);
                        goto restart;
                }
        }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b0c55cb2a2e..c19e16f0b2d0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
        return 0;
 }
+/* Match file and dirent using either filehandle or fileid
+ * Note: caller is responsible for checking the fsid
+ */
 static
 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
+        struct nfs_inode *nfsi;
        if (dentry->d_inode == NULL)
                goto different;
-        if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
-                goto different;
+        nfsi = NFS_I(dentry->d_inode);
-        return 1;
+        if (entry->fattr->fileid == nfsi->fileid)
+                return 1;
+        if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
+                return 1;
 different:
        return 0;
 }
@@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
        struct inode *inode;
        int status;
+        if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
+                return;
+        if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
+                return;
        if (filename.name[0] == '.') {
                if (filename.len == 1)
                        return;
@@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
        dentry = d_lookup(parent, &filename);
        if (dentry != NULL) {
+                /* Is there a mountpoint here? If so, just exit */
+                if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
+                                        &entry->fattr->fsid))
+                        goto out;
                if (nfs_same_file(dentry, entry)) {
                        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
                        status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94712fc781fa..e679d24c39d3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
                iocb->ki_filp,
                iov_iter_count(to), (unsigned long) iocb->ki_pos);
-        result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+        result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
        if (!result) {
                result = generic_file_read_iter(iocb, to);
                if (result > 0)
@@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
        dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
                filp, (unsigned long) count, (unsigned long long) *ppos);
-        res = nfs_revalidate_mapping(inode, filp->f_mapping);
+        res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
        if (!res) {
                res = generic_file_splice_read(filp, ppos, pipe, count, flags);
                if (res > 0)
@@ -372,6 +372,10 @@ start:
                                 nfs_wait_bit_killable, TASK_KILLABLE);
        if (ret)
                return ret;
+        /*
+         * Wait for O_DIRECT to complete
+         */
+        nfs_inode_dio_wait(mapping->host);
        page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
@@ -619,6 +623,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        /* make sure the cache has finished storing the page */
        nfs_fscache_wait_on_page_write(NFS_I(inode), page);
+        wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
+                        nfs_wait_bit_killable, TASK_KILLABLE);
        lock_page(page);
        mapping = page_file_mapping(page);
        if (mapping != inode->i_mapping)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..d42dff6d5e98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
 * This is a copy of the common vmtruncate, but with the locking
 * corrected to take into account the fact that NFS requires
 * inode->i_size to be updated under the inode->i_lock.
+ * Note: must be called with inode->i_lock held!
 */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
        if (err)
                goto out;
-        spin_lock(&inode->i_lock);
        i_size_write(inode, offset);
        /* Optimisation */
        if (offset == 0)
                NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&inode->i_lock);
        truncate_pagecache(inode, offset);
+        spin_lock(&inode->i_lock);
 out:
        return err;
 }
@@ -585,10 +586,15 @@ out:
 * Note: we do this in the *proc.c in order to ensure that
 *       it works for things like exclusive creates too.
 */
-void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
+                struct nfs_fattr *fattr)
 {
+        /* Barrier: bump the attribute generation count. */
+        nfs_fattr_set_barrier(fattr);
+        spin_lock(&inode->i_lock);
+        NFS_I(inode)->attr_gencount = fattr->gencount;
        if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
-                spin_lock(&inode->i_lock);
                if ((attr->ia_valid & ATTR_MODE) != 0) {
                        int mode = attr->ia_mode & S_IALLUGO;
                        mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
                        inode->i_gid = attr->ia_gid;
                nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
                                | NFS_INO_INVALID_ACL);
-                spin_unlock(&inode->i_lock);
        }
        if ((attr->ia_valid & ATTR_SIZE) != 0) {
                nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
                nfs_vmtruncate(inode, attr->ia_size);
        }
+        nfs_update_inode(inode, fattr);
+        spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
@@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
        if (mapping->nrpages != 0) {
                if (S_ISREG(inode->i_mode)) {
+                        unmap_mapping_range(mapping, 0, 0, 0);
                        ret = nfs_sync_mapping(mapping);
                        if (ret < 0)
                                return ret;
@@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
 }
 /**
- * nfs_revalidate_mapping - Revalidate the pagecache
+ * __nfs_revalidate_mapping - Revalidate the pagecache
 * @inode - pointer to host inode
 * @mapping - pointer to mapping
+ * @may_lock - take inode->i_mutex?
 */
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+static int __nfs_revalidate_mapping(struct inode *inode,
+                struct address_space *mapping,
+                bool may_lock)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        unsigned long *bitlock = &nfsi->flags;
@@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
        nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
        spin_unlock(&inode->i_lock);
        trace_nfs_invalidate_mapping_enter(inode);
-        ret = nfs_invalidate_mapping(inode, mapping);
+        if (may_lock) {
+                mutex_lock(&inode->i_mutex);
+                ret = nfs_invalidate_mapping(inode, mapping);
+                mutex_unlock(&inode->i_mutex);
+        } else
+                ret = nfs_invalidate_mapping(inode, mapping);
        trace_nfs_invalidate_mapping_exit(inode, ret);
        clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1123,6 +1139,29 @@ out:
        return ret;
 }
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+        return __nfs_revalidate_mapping(inode, mapping, false);
+}
+/**
+ * nfs_revalidate_mapping_protected - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ *
+ * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
+ * while invalidating the mapping.
+ */
+int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
+{
+        return __nfs_revalidate_mapping(inode, mapping, true);
+}
 static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
@@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat
        return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
 }
-static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
-{
-        if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
-                return 0;
-        return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
-}
 static atomic_long_t nfs_attr_generation_counter;
 static unsigned long nfs_read_attr_generation_counter(void)
@@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void)
 {
        return atomic_long_inc_return(&nfs_attr_generation_counter);
 }
+EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter);
 void nfs_fattr_init(struct nfs_fattr *fattr)
 {
@@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
 }
 EXPORT_SYMBOL_GPL(nfs_fattr_init);
+/**
+ * nfs_fattr_set_barrier
+ * @fattr: attributes
+ *
+ * Used to set a barrier after an attribute was updated. This
+ * barrier ensures that older attributes from RPC calls that may
+ * have raced with our update cannot clobber these new values.
+ * Note that you are still responsible for ensuring that other
+ * operations which change the attribute on the server do not
+ * collide.
+ */
+void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
+{
+        fattr->gencount = nfs_inc_attr_generation_counter();
+}
 struct nfs_fattr *nfs_alloc_fattr(void)
 {
        struct nfs_fattr *fattr;
@@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
        return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
                nfs_ctime_need_update(inode, fattr) ||
-                nfs_size_need_update(inode, fattr) ||
                ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
 }
@@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
        int status;
        spin_lock(&inode->i_lock);
+        nfs_fattr_set_barrier(fattr);
        status = nfs_post_op_update_inode_locked(inode, fattr);
        spin_unlock(&inode->i_lock);
@@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
 /**
- * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
 * @inode - pointer to inode
 * @fattr - updated attributes
 *
@@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
 *
 * This function is mainly designed to be used by the ->write_done() functions.
 */
-int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
        int status;
-        spin_lock(&inode->i_lock);
        /* Don't do a WCC update if these attributes are already stale */
        if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
                        !nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
        }
 out_noforce:
        status = nfs_post_op_update_inode_locked(inode, fattr);
+        return status;
+}
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+        int status;
+        spin_lock(&inode->i_lock);
+        nfs_fattr_set_barrier(fattr);
+        status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
        spin_unlock(&inode->i_lock);
        return status;
 }
@@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
                nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
                nfsi->attrtimeo_timestamp = now;
+                /* Set barrier to be more recent than all outstanding updates */
                nfsi->attr_gencount = nfs_inc_attr_generation_counter();
        } else {
                if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
@@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                                nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
                        nfsi->attrtimeo_timestamp = now;
                }
+                /* Set the barrier to be more recent than this fattr */
+                if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+                        nfsi->attr_gencount = fattr->gencount;
        }
        invalid &= ~NFS_INO_INVALID_ATTR;
        /* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
                             struct nfs_commit_info *cinfo,
                             u32 ds_commit_idx);
 int nfs_write_need_commit(struct nfs_pgio_header *);
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
                            int how, struct nfs_commit_info *cinfo);
 void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        nfs_fattr_init(fattr);
        status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        if (status == 0)
-                nfs_setattr_update_inode(inode, sattr);
+                nfs_setattr_update_inode(inode, sattr, fattr);
        dprintk("NFS reply setattr: %d\n", status);
        return status;
 }
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
        if (nfs3_async_handle_jukebox(task, inode))
                return -EAGAIN;
        if (task->tk_status >= 0)
-                nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+                nfs_writeback_update_inode(hdr);
        return 0;
 }
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 2a932fdc57cb..53852a4bd88b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
                if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
                        entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+                if (entry->fattr->fileid != entry->ino) {
+                        entry->fattr->mounted_on_fileid = entry->ino;
+                        entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
+                }
                /* In fact, a post_op_fh3: */
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(p == NULL))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8646af9b11d2..86d6214ea022 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new,
        spin_lock(&nn->nfs_client_lock);
        list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+                if (pos == new)
+                        goto found;
                if (pos->rpc_ops != new->rpc_ops)
                        continue;
@@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
                        prev = pos;
                        status = nfs_wait_client_init_complete(pos);
-                        if (pos->cl_cons_state == NFS_CS_SESSION_INITING) {
-                                nfs4_schedule_lease_recovery(pos);
-                                status = nfs4_wait_clnt_recover(pos);
-                        }
                        spin_lock(&nn->nfs_client_lock);
                        if (status < 0)
                                break;
@@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
                 */
                if (!nfs4_match_client_owner_id(pos, new))
                        continue;
+found:
                atomic_inc(&pos->cl_count);
                *result = pos;
                status = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 88180ac5ea0e..627f37c44456 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
        if (!cinfo->atomic || cinfo->before != dir->i_version)
                nfs_force_lookup_revalidate(dir);
        dir->i_version = cinfo->after;
+        nfsi->attr_gencount = nfs_inc_attr_generation_counter();
        nfs_fscache_invalidate(dir);
        spin_unlock(&dir->i_lock);
 }
@@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
        opendata->o_arg.open_flags = 0;
        opendata->o_arg.fmode = fmode;
+        opendata->o_arg.share_access = nfs4_map_atomic_open_share(
+                        NFS_SB(opendata->dentry->d_sb),
+                        fmode, 0);
        memset(&opendata->o_res, 0, sizeof(opendata->o_res));
        memset(&opendata->c_res, 0, sizeof(opendata->c_res));
        nfs4_init_opendata_res(opendata);
@@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir,
                                opendata->o_res.f_attr, sattr,
                                state, label, olabel);
                if (status == 0) {
-                        nfs_setattr_update_inode(state->inode, sattr);
+                        nfs_setattr_update_inode(state->inode, sattr,
-                        nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+                                        opendata->o_res.f_attr);
                        nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
                }
        }
@@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
                case -NFS4ERR_BAD_STATEID:
                case -NFS4ERR_EXPIRED:
                        if (!nfs4_stateid_match(&calldata->arg.stateid,
-                                                &state->stateid)) {
+                                                &state->open_stateid)) {
                                rpc_restart_call_prepare(task);
                                goto out_release;
                        }
@@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
        is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
        is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
-        nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid);
+        nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
        /* Calculate the change in open mode */
        calldata->arg.fmode = 0;
        if (state->n_rdwr == 0) {
@@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
        if (status == 0) {
-                nfs_setattr_update_inode(inode, sattr);
+                nfs_setattr_update_inode(inode, sattr, fattr);
                nfs_setsecurity(inode, fattr, label);
        }
        nfs4_label_free(label);
@@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
        }
        if (task->tk_status >= 0) {
                renew_lease(NFS_SERVER(inode), hdr->timestamp);
-                nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
+                nfs_writeback_update_inode(hdr);
        }
        return 0;
 }
@@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
        if (status == 0) {
                clp->cl_clientid = res.clientid;
-                clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
+                clp->cl_exchange_flags = res.flags;
-                if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+                /* Client ID is not confirmed */
+                if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
+                        clear_bit(NFS4_SESSION_ESTABLISHED,
+                                        &clp->cl_session->session_state);
                        clp->cl_seqid = res.seqid;
+                }
                kfree(clp->cl_serverowner);
                clp->cl_serverowner = res.server_owner;
@@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session,
                struct nfs41_create_session_res *res)
 {
        nfs4_copy_sessionid(&session->sess_id, &res->sessionid);
+        /* Mark client id and session as being confirmed */
+        session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
+        set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state);
        session->flags = res->flags;
        memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs));
        if (res->flags & SESSION4_BACK_CHAN)
@@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
        dprintk("--> nfs4_proc_destroy_session\n");
        /* session is still being setup */
-        if (session->clp->cl_cons_state != NFS_CS_READY)
+        if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state))
-                return status;
+                return 0;
        status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        trace_nfs4_destroy_session(session->clp, status);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index fc46c7455898..e3ea2c5324d6 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -70,6 +70,7 @@ struct nfs4_session {
 enum nfs4_session_state {
        NFS4_SESSION_INITING,
+        NFS4_SESSION_ESTABLISHED,
 };
 extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ad908e9ce9c..f95e3b58bbc3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
        status = nfs4_proc_exchange_id(clp, cred);
        if (status != NFS4_OK)
                return status;
-        set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
-        return nfs41_walk_client_list(clp, result, cred);
+        status = nfs41_walk_client_list(clp, result, cred);
+        if (status < 0)
+                return status;
+        if (clp != *result)
+                return 0;
+        /* Purge state if the client id was established in a prior instance */
+        if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
+                set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+        else
+                set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+        nfs4_schedule_state_manager(clp);
+        status = nfs_wait_client_init_complete(clp);
+        if (status < 0)
+                nfs_put_client(clp);
+        return status;
 }
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        nfs_fattr_init(fattr);
        status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        if (status == 0)
-                nfs_setattr_update_inode(inode, sattr);
+                nfs_setattr_update_inode(inode, sattr, fattr);
        dprintk("NFS reply setattr: %d\n", status);
        return status;
 }
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
 static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
-        struct inode *inode = hdr->inode;
        if (task->tk_status >= 0)
-                nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+                nfs_writeback_update_inode(hdr);
        return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..849ed784d6ac 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
        return 0;
 }
+static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
+                struct nfs_fattr *fattr)
+{
+        struct nfs_pgio_args *argp = &hdr->args;
+        struct nfs_pgio_res *resp = &hdr->res;
+        if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+                return;
+        if (argp->offset + resp->count != fattr->size)
+                return;
+        if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+                return;
+        /* Set attribute barrier */
+        nfs_fattr_set_barrier(fattr);
+}
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
+{
+        struct nfs_fattr *fattr = hdr->res.fattr;
+        struct inode *inode = hdr->inode;
+        if (fattr == NULL)
+                return;
+        spin_lock(&inode->i_lock);
+        nfs_writeback_check_extend(hdr, fattr);
+        nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
+        spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
 /*
 * This function is called when the WRITE call is complete.
 */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdbc78c72542..03d647bf195d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
        seg->offset = iomap.offset;
        seg->length = iomap.length;
-        dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es);
+        dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
        return 0;
 out_error:
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 9da89fddab33..9aa2796da90d 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
                p = xdr_decode_hyper(p, &bex.foff);
                if (bex.foff & (block_size - 1)) {
-                        dprintk("%s: unaligned offset %lld\n",
+                        dprintk("%s: unaligned offset 0x%llx\n",
                                __func__, bex.foff);
                        goto fail;
                }
                p = xdr_decode_hyper(p, &bex.len);
                if (bex.len & (block_size - 1)) {
-                        dprintk("%s: unaligned length %lld\n",
+                        dprintk("%s: unaligned length 0x%llx\n",
                                __func__, bex.foff);
                        goto fail;
                }
                p = xdr_decode_hyper(p, &bex.soff);
                if (bex.soff & (block_size - 1)) {
-                        dprintk("%s: unaligned disk offset %lld\n",
+                        dprintk("%s: unaligned disk offset 0x%llx\n",
                                __func__, bex.soff);
                        goto fail;
                }
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3c1bfa155571..6904213a4363 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
 {
        struct super_block *sb = exp->ex_path.mnt->mnt_sb;
-        if (exp->ex_flags & NFSEXP_NOPNFS)
+        if (!(exp->ex_flags & NFSEXP_PNFS))
                return;
        if (sb->s_export_op->get_uuid &&
@@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
                        list_move_tail(&lp->lo_perstate, reaplist);
                        return;
                }
-                end = seg->offset;
+                lo->offset = layout_end(seg);
        } else {
                /* retain the whole layout segment on a split. */
                if (layout_end(seg) < end) {
                        dprintk("%s: split not supported\n", __func__);
                        return;
                }
+                end = seg->offset;
-                lo->offset = layout_end(seg);
        }
        layout_update_len(lo, end);
@@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
        spin_lock(&clp->cl_lock);
        list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
+                if (ls->ls_layout_type != lrp->lr_layout_type)
+                        continue;
                if (lrp->lr_return_type == RETURN_FSID &&
                    !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
                                   &cstate->current_fh.fh_handle))
@@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
        rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
-        nfsd4_cb_layout_fail(ls);
+        trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
        printk(KERN_WARNING
                "nfsd: client %s failed to respond to layout recall. "
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d30bea8d0277..92b9d97aff4f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
                nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
        gdp->gd_notify_types &= ops->notify_types;
-        exp_put(exp);
 out:
+        exp_put(exp);
        return nfserr;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6b2a09f793f..8ba1d888f1e6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp)
                nfs4_put_stid(&dp->dl_stid);
        }
        while (!list_empty(&clp->cl_revoked)) {
-                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
+                dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
                list_del_init(&dp->dl_recall_lru);
                nfs4_put_stid(&dp->dl_stid);
        }
@@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
        } else
                nfs4_free_openowner(&oo->oo_owner);
        spin_unlock(&clp->cl_lock);
-        return oo;
+        return ret;
 }
 static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
@@ -5062,7 +5062,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
        } else
                nfs4_free_lockowner(&lo->lo_owner);
        spin_unlock(&clp->cl_lock);
-        return lo;
+        return ret;
 }
 static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index df5e66caf100..5fb7e78169a6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
        p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
        p = xdr_decode_hyper(p, &lgp->lg_seg.length);
        p = xdr_decode_hyper(p, &lgp->lg_minlength);
-        nfsd4_decode_stateid(argp, &lgp->lg_sid);
+        status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
+        if (status)
+                return status;
        READ_BUF(4);
        lgp->lg_maxcount = be32_to_cpup(p++);
@@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
        p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
        p = xdr_decode_hyper(p, &lcp->lc_seg.length);
        lcp->lc_reclaim = be32_to_cpup(p++);
-        nfsd4_decode_stateid(argp, &lcp->lc_sid);
+        status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
+        if (status)
+                return status;
        READ_BUF(4);
        lcp->lc_newoffset = be32_to_cpup(p++);
        if (lcp->lc_newoffset) {
@@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
                READ_BUF(16);
                p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
                p = xdr_decode_hyper(p, &lrp->lr_seg.length);
-                nfsd4_decode_stateid(argp, &lrp->lr_sid);
+                status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
+                if (status)
+                        return status;
                READ_BUF(4);
                lrp->lrf_body_len = be32_to_cpup(p++);
                if (lrp->lrf_body_len > 0) {
@@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
                return nfserr_resource;
        *p++ = cpu_to_be32(lrp->lrs_present);
        if (lrp->lrs_present)
-                nfsd4_encode_stateid(xdr, &lrp->lr_sid);
+                return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
        return nfs_ok;
 }
 #endif /* CONFIG_NFSD_PNFS */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 83a9694ec485..46ec934f5dee 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void)
 {
        unsigned int hashsize;
        unsigned int i;
+        int status = 0;
        max_drc_entries = nfsd_cache_size_limit();
        atomic_set(&num_drc_entries, 0);
        hashsize = nfsd_hashsize(max_drc_entries);
        maskbits = ilog2(hashsize);
-        register_shrinker(&nfsd_reply_cache_shrinker);
+        status = register_shrinker(&nfsd_reply_cache_shrinker);
+        if (status)
+                return status;
        drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
                                        0, 0, NULL);
        if (!drc_slab)
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b2e3ff347620..ecdbae19a766 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,6 +31,8 @@
 #include "alloc.h"
 #include "dat.h"
+static void __nilfs_btree_init(struct nilfs_bmap *bmap);
 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
 {
        struct nilfs_btree_path *path;
@@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
        return ret;
 }
+/**
+ * nilfs_btree_root_broken - verify consistency of btree root node
+ * @node: btree root node to be examined
+ * @ino: inode number
+ *
+ * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ */
+static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
+                                   unsigned long ino)
+{
+        int level, flags, nchildren;
+        int ret = 0;
+        level = nilfs_btree_node_get_level(node);
+        flags = nilfs_btree_node_get_flags(node);
+        nchildren = nilfs_btree_node_get_nchildren(node);
+        if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
+                     level > NILFS_BTREE_LEVEL_MAX ||
+                     nchildren < 0 ||
+                     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
+                pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
+                        ino, level, flags, nchildren);
+                ret = 1;
+        }
+        return ret;
+}
 int nilfs_btree_broken_node_block(struct buffer_head *bh)
 {
        int ret;
@@ -1713,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
        /* convert and insert */
        dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
-        nilfs_btree_init(btree);
+        __nilfs_btree_init(btree);
        if (nreq != NULL) {
                nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
                nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
@@ -2294,12 +2324,23 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
        .bop_gather_data        =       NULL,
 };
-int nilfs_btree_init(struct nilfs_bmap *bmap)
+static void __nilfs_btree_init(struct nilfs_bmap *bmap)
 {
        bmap->b_ops = &nilfs_btree_ops;
        bmap->b_nchildren_per_block =
                NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
-        return 0;
+}
+int nilfs_btree_init(struct nilfs_bmap *bmap)
+{
+        int ret = 0;
+        __nilfs_btree_init(bmap);
+        if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
+                                    bmap->b_inode->i_ino))
+                ret = -EIO;
+        return ret;
 }
 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 469086b9f99b..0c3f303baf32 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
                                             struct the_nilfs *nilfs)
 {
        struct nilfs_inode_info *ii, *n;
+        int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
        int defer_iput = false;
        spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
                brelse(ii->i_bh);
                ii->i_bh = NULL;
                list_del_init(&ii->i_dirty);
-                if (!ii->vfs_inode.i_nlink) {
+                if (!ii->vfs_inode.i_nlink || during_mount) {
                        /*
-                         * Defer calling iput() to avoid a deadlock
+                         * Defer calling iput() to avoid deadlocks if
-                         * over I_SYNC flag for inodes with i_nlink == 0
+                         * i_nlink == 0 or mount is not yet finished.
                         */
                        list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
                        defer_iput = true;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9a66ff79ff27..d2f97ecca6a5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
            !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
                return false;
-        if (event_mask & marks_mask & ~marks_ignored_mask)
+        if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
+                                 ~marks_ignored_mask)
                return true;
        return false;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8490c64d34fe..460c6c37e683 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
 static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
 {
-        if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+        if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
                return 1;
        return 0;
 }
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 20e37a3ed26f..db64ce2d4667 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -102,11 +102,11 @@
                                         | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
                                         | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
                                         | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG  \
-                                         | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
+                                         | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
+                                         | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP    (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
                                         | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
-                                         | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
+                                         | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
-                                         | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
 /*
 * Heartbeat-only devices are missing journals and other files.  The
@@ -179,6 +179,11 @@
 #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO      0x4000
 /*
+ * Append Direct IO support
+ */
+#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO       0x8000
+/*
 * backup superblock flag is used to indicate that this volume
 * has backup superblocks.
 */
@@ -200,10 +205,6 @@
 #define OCFS2_FEATURE_RO_COMPAT_USRQUOTA        0x0002
 #define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA        0x0004
-/*
- * Append Direct IO support
- */
-#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO      0x0008
 /* The byte offset of the first backup block will be 1G.
 * The following will be 4G, 16G, 64G, 256G and 1T.
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b90952f528b1..5f0d1993e6e3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
        struct ovl_fs *ufs = sb->s_fs_info;
-        if (!(*flags & MS_RDONLY) &&
+        if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
-            (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
                return -EROFS;
        return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                        break;
                default:
+                        pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
                        return -EINVAL;
                }
        }
+        /* Workdir is useless in non-upper mount */
+        if (!config->upperdir && config->workdir) {
+                pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+                        config->workdir);
+                kfree(config->workdir);
+                config->workdir = NULL;
+        }
        return 0;
 }
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_stack_depth = 0;
        if (ufs->config.upperdir) {
-                /* FIXME: workdir is not needed for a R/O mount */
                if (!ufs->config.workdir) {
                        pr_err("overlayfs: missing 'workdir'\n");
                        goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                if (err)
                        goto out_free_config;
+                /* Upper fs should not be r/o */
+                if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
+                        pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+                        err = -EINVAL;
+                        goto out_put_upperpath;
+                }
                err = ovl_mount_dir(ufs->config.workdir, &workpath);
                if (err)
                        goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        err = -EINVAL;
        stacklen = ovl_split_lowerdirs(lowertmp);
-        if (stacklen > OVL_MAX_STACK)
+        if (stacklen > OVL_MAX_STACK) {
+                pr_err("overlayfs: too many lower directries, limit is %d\n",
+                       OVL_MAX_STACK);
                goto out_free_lowertmp;
+        } else if (!ufs->config.upperdir && stacklen == 1) {
+                pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
+                goto out_free_lowertmp;
+        }
        stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
        if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                ufs->numlower++;
        }
-        /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */
+        /* If the upper fs is nonexistent, we mark overlayfs r/o too */
-        if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))
+        if (!ufs->upper_mnt)
                sb->s_flags |= MS_RDONLY;
        sb->s_d_op = &ovl_dentry_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
 static int pagemap_open(struct inode *inode, struct file *file)
 {
+        /* do not disclose physical addresses: attack vector */
+        if (!capable(CAP_SYS_ADMIN))
+                return -EPERM;
        pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
                        "to stop being page-shift some time soon. See the "
                        "linux/Documentation/vm/pagemap.txt for details.\n");
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d12fb44..a2e1cb8a568b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -397,7 +397,8 @@ STATIC int				/* error (positive) */
 xfs_zero_last_block(
        struct xfs_inode        *ip,
        xfs_fsize_t             offset,
-        xfs_fsize_t             isize)
+        xfs_fsize_t             isize,
+        bool                    *did_zeroing)
 {
        struct xfs_mount        *mp = ip->i_mount;
        xfs_fileoff_t           last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +426,7 @@ xfs_zero_last_block(
        zero_len = mp->m_sb.sb_blocksize - zero_offset;
        if (isize + zero_len > offset)
                zero_len = offset - isize;
+        *did_zeroing = true;
        return xfs_iozero(ip, isize, zero_len);
 }
@@ -443,7 +445,8 @@ int					/* error (positive) */
 xfs_zero_eof(
        struct xfs_inode        *ip,
        xfs_off_t               offset,         /* starting I/O offset */
-        xfs_fsize_t             isize)          /* current inode size */
+        xfs_fsize_t             isize,          /* current inode size */
+        bool                    *did_zeroing)
 {
        struct xfs_mount        *mp = ip->i_mount;
        xfs_fileoff_t           start_zero_fsb;
@@ -465,7 +468,7 @@ xfs_zero_eof(
         * We only zero a part of that block so it is handled specially.
         */
        if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-                error = xfs_zero_last_block(ip, offset, isize);
+                error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
                if (error)
                        return error;
        }
@@ -525,6 +528,7 @@ xfs_zero_eof(
                if (error)
                        return error;
+                *did_zeroing = true;
                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
        }
@@ -567,13 +571,15 @@ restart:
         * having to redo all checks before.
         */
        if (*pos > i_size_read(inode)) {
+                bool    zero = false;
                if (*iolock == XFS_IOLOCK_SHARED) {
                        xfs_rw_iunlock(ip, *iolock);
                        *iolock = XFS_IOLOCK_EXCL;
                        xfs_rw_ilock(ip, *iolock);
                        goto restart;
                }
-                error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+                error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
                if (error)
                        return error;
        }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index daafa1f6d260..6163767aa856 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2867,6 +2867,10 @@ xfs_rename(
         * Handle RENAME_EXCHANGE flags
         */
        if (flags & RENAME_EXCHANGE) {
+                if (target_ip == NULL) {
+                        error = -EINVAL;
+                        goto error_return;
+                }
                error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
                                         target_dp, target_name, target_ip,
                                         &free_list, &first_block, spaceres);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 86cd6b39bed7..a1cd55f3f351 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -384,10 +384,11 @@ enum xfs_prealloc_flags {
        XFS_PREALLOC_INVISIBLE  = (1 << 4),
 };
-int             xfs_update_prealloc_flags(struct xfs_inode *,
+int     xfs_update_prealloc_flags(struct xfs_inode *ip,
-                        enum xfs_prealloc_flags);
+                                  enum xfs_prealloc_flags flags);
-int             xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int     xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
-int             xfs_iozero(struct xfs_inode *, loff_t, size_t);
+                     xfs_fsize_t isize, bool *did_zeroing);
+int     xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
 #define IHOLD(ip) \
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d919ad7b16bf..e53a90331422 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -751,6 +751,7 @@ xfs_setattr_size(
        int                     error;
        uint                    lock_flags = 0;
        uint                    commit_flags = 0;
+        bool                    did_zeroing = false;
        trace_xfs_setattr(ip);
@@ -794,20 +795,16 @@ xfs_setattr_size(
                return error;
        /*
-         * Now we can make the changes.  Before we join the inode to the
+         * File data changes must be complete before we start the transaction to
-         * transaction, take care of the part of the truncation that must be
+         * modify the inode.  This needs to be done before joining the inode to
-         * done without the inode lock.  This needs to be done before joining
+         * the transaction because the inode cannot be unlocked once it is a
-         * the inode to the transaction, because the inode cannot be unlocked
+         * part of the transaction.
-         * once it is a part of the transaction.
+         *
+         * Start with zeroing any data block beyond EOF that we may expose on
+         * file extension.
         */
        if (newsize > oldsize) {
-                /*
+                error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
-                 * Do the first part of growing a file: zero any data in the
-                 * last block that is beyond the old EOF.  We need to do this
-                 * before the inode is joined to the transaction to modify
-                 * i_size.
-                 */
-                error = xfs_zero_eof(ip, newsize, oldsize);
                if (error)
                        return error;
        }
@@ -817,23 +814,18 @@ xfs_setattr_size(
         * any previous writes that are beyond the on disk EOF and the new
         * EOF that have not been written out need to be written here.  If we
         * do not write the data out, we expose ourselves to the null files
-         * problem.
+         * problem. Note that this includes any block zeroing we did above;
-         *
+         * otherwise those blocks may not be zeroed after a crash.
-         * Only flush from the on disk size to the smaller of the in memory
-         * file size or the new size as that's the range we really care about
-         * here and prevents waiting for other data not within the range we
-         * care about here.
         */
-        if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+        if (newsize > ip->i_d.di_size &&
+            (oldsize != ip->i_d.di_size || did_zeroing)) {
                error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                                      ip->i_d.di_size, newsize);
                if (error)
                        return error;
        }
-        /*
+        /* Now wait for all direct I/O to complete. */
-         * Wait for all direct I/O to complete.
-         */
        inode_dio_wait(inode);
        /*
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 4b33ef112400..365dd57ea760 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -300,8 +300,10 @@ xfs_fs_commit_blocks(
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-        if (error)
+        if (error) {
+                xfs_trans_cancel(tp, 0);
                goto out_drop_iolock;
+        }
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 53cc2aaf8d2b..fbbb9e62e274 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -836,6 +836,11 @@ xfs_qm_reset_dqcounts(
                 */
                xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
                            "xfs_quotacheck");
+                /*
+                 * Reset type in case we are reusing group quota file for
+                 * project quotas or vice versa
+                 */
+                ddq->d_flags = type;
                ddq->d_bcount = 0;
                ddq->d_icount = 0;
                ddq->d_rtbcount = 0;