101 files changed, 1154 insertions, 541 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 9298c65ad9c7..b96fc6ce4855 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -75,6 +75,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
        unsigned int sz = sizeof(struct bio) + extra_size;
        struct kmem_cache *slab = NULL;
        struct bio_slab *bslab, *new_bio_slabs;
+        unsigned int new_bio_slab_max;
        unsigned int i, entry = -1;
        mutex_lock(&bio_slab_lock);
@@ -97,12 +98,13 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
                goto out_unlock;
        if (bio_slab_nr == bio_slab_max && entry == -1) {
-                bio_slab_max <<= 1;
+                new_bio_slab_max = bio_slab_max << 1;
                new_bio_slabs = krealloc(bio_slabs,
-                                         bio_slab_max * sizeof(struct bio_slab),
+                                         new_bio_slab_max * sizeof(struct bio_slab),
                                         GFP_KERNEL);
                if (!new_bio_slabs)
                        goto out_unlock;
+                bio_slab_max = new_bio_slab_max;
                bio_slabs = new_bio_slabs;
        }
        if (entry == -1)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3c1d3dae77d..1a1e5e3b1eaf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1661,6 +1661,39 @@ static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
        return ret;
 }
+static ssize_t blkdev_splice_read(struct file *file, loff_t *ppos,
+                                  struct pipe_inode_info *pipe, size_t len,
+                                  unsigned int flags)
+{
+        ssize_t ret;
+        struct block_device *bdev = I_BDEV(file->f_mapping->host);
+        percpu_down_read(&bdev->bd_block_size_semaphore);
+        ret = generic_file_splice_read(file, ppos, pipe, len, flags);
+        percpu_up_read(&bdev->bd_block_size_semaphore);
+        return ret;
+}
+static ssize_t blkdev_splice_write(struct pipe_inode_info *pipe,
+                                   struct file *file, loff_t *ppos, size_t len,
+                                   unsigned int flags)
+{
+        ssize_t ret;
+        struct block_device *bdev = I_BDEV(file->f_mapping->host);
+        percpu_down_read(&bdev->bd_block_size_semaphore);
+        ret = generic_file_splice_write(pipe, file, ppos, len, flags);
+        percpu_up_read(&bdev->bd_block_size_semaphore);
+        return ret;
+}
 /*
 * Try to release a page associated with block device when the system
 * is under memory pressure.
@@ -1699,8 +1732,8 @@ const struct file_operations def_blk_fops = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
-        .splice_read    = generic_file_splice_read,
+        .splice_read    = blkdev_splice_read,
-        .splice_write   = generic_file_splice_write,
+        .splice_write   = blkdev_splice_write,
 };
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f3187938e081..208d8aa5b07e 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -283,9 +283,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                goto out;
        }
-        rcu_read_lock();
+        root_level = btrfs_old_root_level(root, time_seq);
-        root_level = btrfs_header_level(root->node);
-        rcu_read_unlock();
        if (root_level + 1 == level)
                goto out;
@@ -1177,16 +1175,15 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
        return ret;
 }
-static char *ref_to_path(struct btrfs_root *fs_root,
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
-                         struct btrfs_path *path,
+                        u32 name_len, unsigned long name_off,
-                         u32 name_len, unsigned long name_off,
+                        struct extent_buffer *eb_in, u64 parent,
-                         struct extent_buffer *eb_in, u64 parent,
+                        char *dest, u32 size)
-                         char *dest, u32 size)
 {
        int slot;
        u64 next_inum;
        int ret;
-        s64 bytes_left = size - 1;
+        s64 bytes_left = ((s64)size) - 1;
        struct extent_buffer *eb = eb_in;
        struct btrfs_key found_key;
        int leave_spinning = path->leave_spinning;
@@ -1266,10 +1263,10 @@ char *btrfs_iref_to_path(struct btrfs_root *fs_root,
                         struct extent_buffer *eb_in, u64 parent,
                         char *dest, u32 size)
 {
-        return ref_to_path(fs_root, path,
+        return btrfs_ref_to_path(fs_root, path,
-                           btrfs_inode_ref_name_len(eb_in, iref),
+                                 btrfs_inode_ref_name_len(eb_in, iref),
-                           (unsigned long)(iref + 1),
+                                 (unsigned long)(iref + 1),
-                           eb_in, parent, dest, size);
+                                 eb_in, parent, dest, size);
 }
 /*
@@ -1715,9 +1712,8 @@ static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off,
                                        ipath->fspath->bytes_left - s_ptr : 0;
        fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
-        fspath = ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
+        fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len,
-                             name_off, eb, inum, fspath_min,
+                                   name_off, eb, inum, fspath_min, bytes_left);
-                             bytes_left);
        if (IS_ERR(fspath))
                return PTR_ERR(fspath);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index e75533043a5f..d61feca79455 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -62,6 +62,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
 char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                         struct btrfs_inode_ref *iref, struct extent_buffer *eb,
                         u64 parent, char *dest, u32 size);
+char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
+                        u32 name_len, unsigned long name_off,
+                        struct extent_buffer *eb_in, u64 parent,
+                        char *dest, u32 size);
 struct btrfs_data_container *init_data_container(u32 total_bytes);
 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b33436211000..cdfb4c49a806 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -596,6 +596,11 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
        if (tree_mod_dont_log(fs_info, eb))
                return 0;
+        /*
+         * When we override something during the move, we log these removals.
+         * This can only happen when we move towards the beginning of the
+         * buffer, i.e. dst_slot < src_slot.
+         */
        for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
                ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
                                              MOD_LOG_KEY_REMOVE_WHILE_MOVING);
@@ -647,8 +652,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        if (tree_mod_dont_log(fs_info, NULL))
                return 0;
-        __tree_mod_log_free_eb(fs_info, old_root);
        ret = tree_mod_alloc(fs_info, flags, &tm);
        if (ret < 0)
                goto out;
@@ -926,12 +929,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        ret = btrfs_dec_ref(trans, root, buf, 1, 1);
                        BUG_ON(ret); /* -ENOMEM */
                }
-                /*
+                tree_mod_log_free_eb(root->fs_info, buf);
-                 * don't log freeing in case we're freeing the root node, this
-                 * is done by tree_mod_log_set_root_pointer later
-                 */
-                if (buf != root->node && btrfs_header_level(buf) != 0)
-                        tree_mod_log_free_eb(root->fs_info, buf);
                clean_tree_block(trans, root, buf);
                *last_ref = 1;
        }
@@ -1225,6 +1223,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
        free_extent_buffer(eb);
        __tree_mod_log_rewind(eb_rewin, time_seq, tm);
+        WARN_ON(btrfs_header_nritems(eb_rewin) >
+                BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
        return eb_rewin;
 }
@@ -1241,9 +1241,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 {
        struct tree_mod_elem *tm;
        struct extent_buffer *eb;
+        struct extent_buffer *old;
        struct tree_mod_root *old_root = NULL;
        u64 old_generation = 0;
        u64 logical;
+        u32 blocksize;
        eb = btrfs_read_lock_root_node(root);
        tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
@@ -1259,14 +1261,32 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
        }
        tm = tree_mod_log_search(root->fs_info, logical, time_seq);
-        if (old_root)
+        if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
+                btrfs_tree_read_unlock(root->node);
+                free_extent_buffer(root->node);
+                blocksize = btrfs_level_size(root, old_root->level);
+                old = read_tree_block(root, logical, blocksize, 0);
+                if (!old) {
+                        pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
+                                logical);
+                        WARN_ON(1);
+                } else {
+                        eb = btrfs_clone_extent_buffer(old);
+                        free_extent_buffer(old);
+                }
+        } else if (old_root) {
+                btrfs_tree_read_unlock(root->node);
+                free_extent_buffer(root->node);
                eb = alloc_dummy_extent_buffer(logical, root->nodesize);
-        else
+        } else {
                eb = btrfs_clone_extent_buffer(root->node);
-        btrfs_tree_read_unlock(root->node);
+                btrfs_tree_read_unlock(root->node);
-        free_extent_buffer(root->node);
+                free_extent_buffer(root->node);
+        }
        if (!eb)
                return NULL;
+        extent_buffer_get(eb);
        btrfs_tree_read_lock(eb);
        if (old_root) {
                btrfs_set_header_bytenr(eb, eb->start);
@@ -1279,11 +1299,28 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                __tree_mod_log_rewind(eb, time_seq, tm);
        else
                WARN_ON(btrfs_header_level(eb) != 0);
-        extent_buffer_get(eb);
+        WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
        return eb;
 }
+int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
+{
+        struct tree_mod_elem *tm;
+        int level;
+        tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+        if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
+                level = tm->old_root.level;
+        } else {
+                rcu_read_lock();
+                level = btrfs_header_level(root->node);
+                rcu_read_unlock();
+        }
+        return level;
+}
 static inline int should_cow_block(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct extent_buffer *buf)
@@ -1725,6 +1762,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        goto enospc;
                }
+                tree_mod_log_free_eb(root->fs_info, root->node);
                tree_mod_log_set_root_pointer(root, child);
                rcu_assign_pointer(root->node, child);
@@ -2970,8 +3008,10 @@ static int push_node_left(struct btrfs_trans_handle *trans,
                           push_items * sizeof(struct btrfs_key_ptr));
        if (push_items < src_nritems) {
-                tree_mod_log_eb_move(root->fs_info, src, 0, push_items,
+                /*
-                                     src_nritems - push_items);
+                 * don't call tree_mod_log_eb_move here, key removal was already
+                 * fully logged by tree_mod_log_eb_copy above.
+                 */
                memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
                                      btrfs_node_key_ptr_offset(push_items),
                                      (src_nritems - push_items) *
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 926c9ffc66d9..c72ead869507 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3120,6 +3120,7 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
 {
        return atomic_inc_return(&fs_info->tree_mod_seq);
 }
+int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
 /* root-item.c */
 int btrfs_find_root_ref(struct btrfs_root *tree_root,
@@ -3338,6 +3339,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
 int btrfs_update_inode(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct inode *inode);
+int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root, struct inode *inode);
 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
 int btrfs_orphan_cleanup(struct btrfs_root *root);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8036d3a84853..472873a94d96 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4110,8 +4110,8 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
        return eb;
 err:
-        for (i--; i >= 0; i--)
+        for (; i > 0; i--)
-                __free_page(eb->pages[i]);
+                __free_page(eb->pages[i - 1]);
        __free_extent_buffer(eb);
        return NULL;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 85a1e5053fe6..95542a1b3dfc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -94,8 +94,6 @@ static noinline int cow_file_range(struct inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
-static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root, struct inode *inode);
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@ -2746,8 +2744,9 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        return btrfs_update_inode_item(trans, root, inode);
 }
-static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root, struct inode *inode)
+                                         struct btrfs_root *root,
+                                         struct inode *inode)
 {
        int ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 61168805f175..8fcf9a59c28d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -343,7 +343,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
                return -EOPNOTSUPP;
        if (copy_from_user(&range, arg, sizeof(range)))
                return -EFAULT;
-        if (range.start > total_bytes)
+        if (range.start > total_bytes ||
+            range.len < fs_info->sb->s_blocksize)
                return -EINVAL;
        range.len = min(range.len, total_bytes - range.start);
@@ -570,7 +571,8 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
                ret = btrfs_commit_transaction(trans,
                                               root->fs_info->extent_root);
        }
-        BUG_ON(ret);
+        if (ret)
+                goto fail;
        ret = pending_snapshot->error;
        if (ret)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5039686df6ae..fe9d02c45f8e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -790,8 +790,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
        }
        path = btrfs_alloc_path();
-        if (!path)
+        if (!path) {
-                return -ENOMEM;
+                ret = -ENOMEM;
+                goto out_free_root;
+        }
        key.objectid = 0;
        key.type = BTRFS_QGROUP_STATUS_KEY;
@@ -800,7 +802,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
                                      sizeof(*ptr));
        if (ret)
-                goto out;
+                goto out_free_path;
        leaf = path->nodes[0];
        ptr = btrfs_item_ptr(leaf, path->slots[0],
@@ -818,8 +820,15 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
        fs_info->quota_root = quota_root;
        fs_info->pending_quota_state = 1;
        spin_unlock(&fs_info->qgroup_lock);
-out:
+out_free_path:
        btrfs_free_path(path);
+out_free_root:
+        if (ret) {
+                free_extent_buffer(quota_root->node);
+                free_extent_buffer(quota_root->commit_root);
+                kfree(quota_root);
+        }
+out:
        return ret;
 }
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c7beb543a4a8..e78b297b0b00 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -745,31 +745,36 @@ typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
                                   void *ctx);
 /*
- * Helper function to iterate the entries in ONE btrfs_inode_ref.
+ * Helper function to iterate the entries in ONE btrfs_inode_ref or
+ * btrfs_inode_extref.
 * The iterate callback may return a non zero value to stop iteration. This can
 * be a negative value for error codes or 1 to simply stop it.
 *
- * path must point to the INODE_REF when called.
+ * path must point to the INODE_REF or INODE_EXTREF when called.
 */
 static int iterate_inode_ref(struct send_ctx *sctx,
                             struct btrfs_root *root, struct btrfs_path *path,
                             struct btrfs_key *found_key, int resolve,
                             iterate_inode_ref_t iterate, void *ctx)
 {
-        struct extent_buffer *eb;
+        struct extent_buffer *eb = path->nodes[0];
        struct btrfs_item *item;
        struct btrfs_inode_ref *iref;
+        struct btrfs_inode_extref *extref;
        struct btrfs_path *tmp_path;
        struct fs_path *p;
-        u32 cur;
+        u32 cur = 0;
-        u32 len;
        u32 total;
-        int slot;
+        int slot = path->slots[0];
        u32 name_len;
        char *start;
        int ret = 0;
-        int num;
+        int num = 0;
        int index;
+        u64 dir;
+        unsigned long name_off;
+        unsigned long elem_size;
+        unsigned long ptr;
        p = fs_path_alloc_reversed(sctx);
        if (!p)
@@ -781,24 +786,40 @@ static int iterate_inode_ref(struct send_ctx *sctx,
                return -ENOMEM;
        }
-        eb = path->nodes[0];
-        slot = path->slots[0];
-        item = btrfs_item_nr(eb, slot);
-        iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
-        cur = 0;
-        len = 0;
-        total = btrfs_item_size(eb, item);
-        num = 0;
+        if (found_key->type == BTRFS_INODE_REF_KEY) {
+                ptr = (unsigned long)btrfs_item_ptr(eb, slot,
+                                                    struct btrfs_inode_ref);
+                item = btrfs_item_nr(eb, slot);
+                total = btrfs_item_size(eb, item);
+                elem_size = sizeof(*iref);
+        } else {
+                ptr = btrfs_item_ptr_offset(eb, slot);
+                total = btrfs_item_size_nr(eb, slot);
+                elem_size = sizeof(*extref);
+        }
        while (cur < total) {
                fs_path_reset(p);
-                name_len = btrfs_inode_ref_name_len(eb, iref);
+                if (found_key->type == BTRFS_INODE_REF_KEY) {
-                index = btrfs_inode_ref_index(eb, iref);
+                        iref = (struct btrfs_inode_ref *)(ptr + cur);
+                        name_len = btrfs_inode_ref_name_len(eb, iref);
+                        name_off = (unsigned long)(iref + 1);
+                        index = btrfs_inode_ref_index(eb, iref);
+                        dir = found_key->offset;
+                } else {
+                        extref = (struct btrfs_inode_extref *)(ptr + cur);
+                        name_len = btrfs_inode_extref_name_len(eb, extref);
+                        name_off = (unsigned long)&extref->name;
+                        index = btrfs_inode_extref_index(eb, extref);
+                        dir = btrfs_inode_extref_parent(eb, extref);
+                }
                if (resolve) {
-                        start = btrfs_iref_to_path(root, tmp_path, iref, eb,
+                        start = btrfs_ref_to_path(root, tmp_path, name_len,
-                                                found_key->offset, p->buf,
+                                                  name_off, eb, dir,
-                                                p->buf_len);
+                                                  p->buf, p->buf_len);
                        if (IS_ERR(start)) {
                                ret = PTR_ERR(start);
                                goto out;
@@ -809,9 +830,10 @@ static int iterate_inode_ref(struct send_ctx *sctx,
                                                p->buf_len + p->buf - start);
                                if (ret < 0)
                                        goto out;
-                                start = btrfs_iref_to_path(root, tmp_path, iref,
+                                start = btrfs_ref_to_path(root, tmp_path,
-                                                eb, found_key->offset, p->buf,
+                                                          name_len, name_off,
-                                                p->buf_len);
+                                                          eb, dir,
+                                                          p->buf, p->buf_len);
                                if (IS_ERR(start)) {
                                        ret = PTR_ERR(start);
                                        goto out;
@@ -820,21 +842,16 @@ static int iterate_inode_ref(struct send_ctx *sctx,
                        }
                        p->start = start;
                } else {
-                        ret = fs_path_add_from_extent_buffer(p, eb,
+                        ret = fs_path_add_from_extent_buffer(p, eb, name_off,
-                                        (unsigned long)(iref + 1), name_len);
+                                                             name_len);
                        if (ret < 0)
                                goto out;
                }
+                cur += elem_size + name_len;
-                len = sizeof(*iref) + name_len;
+                ret = iterate(num, dir, index, p, ctx);
-                iref = (struct btrfs_inode_ref *)((char *)iref + len);
-                cur += len;
-                ret = iterate(num, found_key->offset, index, p, ctx);
                if (ret)
                        goto out;
                num++;
        }
@@ -998,7 +1015,8 @@ static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root,
        }
        btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
        if (found_key.objectid != ino ||
-                found_key.type != BTRFS_INODE_REF_KEY) {
+            (found_key.type != BTRFS_INODE_REF_KEY &&
+             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
                ret = -ENOENT;
                goto out;
        }
@@ -1551,8 +1569,8 @@ static int get_first_ref(struct send_ctx *sctx,
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
-        struct btrfs_inode_ref *iref;
        int len;
+        u64 parent_dir;
        path = alloc_path_for_send();
        if (!path)
@@ -1568,27 +1586,41 @@ static int get_first_ref(struct send_ctx *sctx,
        if (!ret)
                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                path->slots[0]);
-        if (ret || found_key.objectid != key.objectid ||
+        if (ret || found_key.objectid != ino ||
-            found_key.type != key.type) {
+            (found_key.type != BTRFS_INODE_REF_KEY &&
+             found_key.type != BTRFS_INODE_EXTREF_KEY)) {
                ret = -ENOENT;
                goto out;
        }
-        iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+        if (key.type == BTRFS_INODE_REF_KEY) {
-                        struct btrfs_inode_ref);
+                struct btrfs_inode_ref *iref;
-        len = btrfs_inode_ref_name_len(path->nodes[0], iref);
+                iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
-        ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
+                                      struct btrfs_inode_ref);
-                        (unsigned long)(iref + 1), len);
+                len = btrfs_inode_ref_name_len(path->nodes[0], iref);
+                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
+                                                     (unsigned long)(iref + 1),
+                                                     len);
+                parent_dir = found_key.offset;
+        } else {
+                struct btrfs_inode_extref *extref;
+                extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                        struct btrfs_inode_extref);
+                len = btrfs_inode_extref_name_len(path->nodes[0], extref);
+                ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
+                                        (unsigned long)&extref->name, len);
+                parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
+        }
        if (ret < 0)
                goto out;
        btrfs_release_path(path);
-        ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL,
+        ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
                        NULL, NULL);
        if (ret < 0)
                goto out;
-        *dir = found_key.offset;
+        *dir = parent_dir;
 out:
        btrfs_free_path(path);
@@ -2430,7 +2462,8 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino);
                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
        } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
                   S_ISFIFO(mode) || S_ISSOCK(mode)) {
-                TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev);
+                TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
+                TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
        }
        ret = send_cmd(sctx);
@@ -3226,7 +3259,8 @@ static int process_all_refs(struct send_ctx *sctx,
                btrfs_item_key_to_cpu(eb, &found_key, slot);
                if (found_key.objectid != key.objectid ||
-                    found_key.type != key.type)
+                    (found_key.type != BTRFS_INODE_REF_KEY &&
+                     found_key.type != BTRFS_INODE_EXTREF_KEY))
                        break;
                ret = iterate_inode_ref(sctx, root, path, &found_key, 0, cb,
@@ -3987,7 +4021,7 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
        if (sctx->cur_ino == 0)
                goto out;
        if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
-            sctx->cmp_key->type <= BTRFS_INODE_REF_KEY)
+            sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
                goto out;
        if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
                goto out;
@@ -4033,22 +4067,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
        if (ret < 0)
                goto out;
-        if (!S_ISLNK(sctx->cur_inode_mode)) {
+        if (!sctx->parent_root || sctx->cur_inode_new) {
-                if (!sctx->parent_root || sctx->cur_inode_new) {
+                need_chown = 1;
+                if (!S_ISLNK(sctx->cur_inode_mode))
                        need_chmod = 1;
-                        need_chown = 1;
+        } else {
-                } else {
+                ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
-                        ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
+                                NULL, NULL, &right_mode, &right_uid,
-                                        NULL, NULL, &right_mode, &right_uid,
+                                &right_gid, NULL);
-                                        &right_gid, NULL);
+                if (ret < 0)
-                        if (ret < 0)
+                        goto out;
-                                goto out;
-                        if (left_uid != right_uid || left_gid != right_gid)
+                if (left_uid != right_uid || left_gid != right_gid)
-                                need_chown = 1;
+                        need_chown = 1;
-                        if (left_mode != right_mode)
+                if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
-                                need_chmod = 1;
+                        need_chmod = 1;
-                }
        }
        if (S_ISREG(sctx->cur_inode_mode)) {
@@ -4335,7 +4368,8 @@ static int changed_cb(struct btrfs_root *left_root,
        if (key->type == BTRFS_INODE_ITEM_KEY)
                ret = changed_inode(sctx, result);
-        else if (key->type == BTRFS_INODE_REF_KEY)
+        else if (key->type == BTRFS_INODE_REF_KEY ||
+                 key->type == BTRFS_INODE_EXTREF_KEY)
                ret = changed_ref(sctx, result);
        else if (key->type == BTRFS_XATTR_ITEM_KEY)
                ret = changed_xattr(sctx, result);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 77db875b5116..04bbfb1052eb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1200,7 +1200,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                         dentry->d_name.len * 2);
        parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
-        ret = btrfs_update_inode(trans, parent_root, parent_inode);
+        ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
 fail:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 029b903a4ae3..0f5ebb72a5ea 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1819,6 +1819,13 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                                    "Failed to relocate sys chunks after "
                                    "device initialization. This can be fixed "
                                    "using the \"btrfs balance\" command.");
+                trans = btrfs_attach_transaction(root);
+                if (IS_ERR(trans)) {
+                        if (PTR_ERR(trans) == -ENOENT)
+                                return 0;
+                        return PTR_ERR(trans);
+                }
+                ret = btrfs_commit_transaction(trans, root);
        }
        return ret;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 02ce90972d81..9349bb37a2fe 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -90,6 +90,8 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
                *max_len = handle_length;
                type = 255;
        }
+        if (dentry)
+                dput(dentry);
        return type;
 }
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 3f152b92a94a..afc2bb691780 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -471,9 +471,19 @@ static int exact_lock(dev_t dev, void *data)
 */
 int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 {
+        int error;
        p->dev = dev;
        p->count = count;
-        return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p);
+        error = kobj_map(cdev_map, dev, count, NULL,
+                         exact_match, exact_lock, p);
+        if (error)
+                return error;
+        kobject_get(p->kobj.parent);
+        return 0;
 }
 static void cdev_unmap(dev_t dev, unsigned count)
@@ -498,14 +508,20 @@ void cdev_del(struct cdev *p)
 static void cdev_default_release(struct kobject *kobj)
 {
        struct cdev *p = container_of(kobj, struct cdev, kobj);
+        struct kobject *parent = kobj->parent;
        cdev_purge(p);
+        kobject_put(parent);
 }
 static void cdev_dynamic_release(struct kobject *kobj)
 {
        struct cdev *p = container_of(kobj, struct cdev, kobj);
+        struct kobject *parent = kobj->parent;
        cdev_purge(p);
        kfree(p);
+        kobject_put(parent);
 }
 static struct kobj_type ktype_cdev_default = {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fc783e264420..0fb15bbbe43c 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
 }
 static void
+cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
+{
+        memcpy(dst, src, sizeof(*dst));
+        dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
+}
+static void
 id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
                struct cifs_sid_id **psidid, char *typestr)
 {
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
                }
        }
-        memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+        cifs_copy_sid(&(*psidid)->sid, sidptr);
        (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
        (*psidid)->refcount = 0;
@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
         * any fields of the node after a reference is put .
         */
        if (test_bit(SID_ID_MAPPED, &psidid->state)) {
-                memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+                cifs_copy_sid(ssid, &psidid->sid);
                psidid->time = jiffies; /* update ts for accessing */
                goto id_sid_out;
        }
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
                if (IS_ERR(sidkey)) {
                        rc = -EINVAL;
                        cFYI(1, "%s: Can't map and id to a SID", __func__);
+                } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
+                        rc = -EIO;
+                        cFYI(1, "%s: Downcall contained malformed key "
+                                "(datalen=%hu)", __func__, sidkey->datalen);
                } else {
                        lsid = (struct cifs_sid *)sidkey->payload.data;
-                        memcpy(&psidid->sid, lsid,
+                        cifs_copy_sid(&psidid->sid, lsid);
-                                sidkey->datalen < sizeof(struct cifs_sid) ?
+                        cifs_copy_sid(ssid, &psidid->sid);
-                                sidkey->datalen : sizeof(struct cifs_sid));
-                        memcpy(ssid, &psidid->sid,
-                                sidkey->datalen < sizeof(struct cifs_sid) ?
-                                sidkey->datalen : sizeof(struct cifs_sid));
                        set_bit(SID_ID_MAPPED, &psidid->state);
                        key_put(sidkey);
                        kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
                        return rc;
                }
                if (test_bit(SID_ID_MAPPED, &psidid->state))
-                        memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+                        cifs_copy_sid(ssid, &psidid->sid);
                else
                        rc = -EINVAL;
        }
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
 static void copy_sec_desc(const struct cifs_ntsd *pntsd,
                                struct cifs_ntsd *pnntsd, __u32 sidsoffset)
 {
-        int i;
        struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
        struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
        owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->osidoffset));
        nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
+        cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
-        nowner_sid_ptr->revision = owner_sid_ptr->revision;
-        nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
-        for (i = 0; i < 6; i++)
-                nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
-        for (i = 0; i < 5; i++)
-                nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
        /* copy group sid */
        group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->gsidoffset));
        ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
                                        sizeof(struct cifs_sid));
+        cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
-        ngroup_sid_ptr->revision = group_sid_ptr->revision;
-        ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
-        for (i = 0; i < 6; i++)
-                ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
-        for (i = 0; i < 5; i++)
-                ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
        return;
 }
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                                kfree(nowner_sid_ptr);
                                return rc;
                        }
-                        memcpy(owner_sid_ptr, nowner_sid_ptr,
+                        cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
-                                        sizeof(struct cifs_sid));
                        kfree(nowner_sid_ptr);
                        *aclflag = CIFS_ACL_OWNER;
                }
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                                kfree(ngroup_sid_ptr);
                                return rc;
                        }
-                        memcpy(group_sid_ptr, ngroup_sid_ptr,
+                        cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
-                                        sizeof(struct cifs_sid));
                        kfree(ngroup_sid_ptr);
                        *aclflag = CIFS_ACL_GROUP;
                }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 7c0a81283645..d3671f2acb29 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -398,7 +398,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
         * in network traffic in the other paths.
         */
        if (!(oflags & O_CREAT)) {
-                struct dentry *res = cifs_lookup(inode, direntry, 0);
+                struct dentry *res;
+                /*
+                 * Check for hashed negative dentry. We have already revalidated
+                 * the dentry and it is fine. No need to perform another lookup.
+                 */
+                if (!d_unhashed(direntry))
+                        return -ENOENT;
+                res = cifs_lookup(inode, direntry, 0);
                if (IS_ERR(res))
                        return PTR_ERR(res);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index f5054025f9da..4c6285fff598 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
        err  = get_user(palp, &up->palette);
        err |= get_user(length, &up->length);
+        if (err)
+                return -EFAULT;
        up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
        err  = put_user(compat_ptr(palp), &up_native->palette);
diff --git a/fs/coredump.c b/fs/coredump.c
index fd37facac8dc..ce47379bfa61 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -450,11 +450,12 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
        cp->file = files[1];
-        replace_fd(0, files[0], 0);
+        err = replace_fd(0, files[0], 0);
+        fput(files[0]);
        /* and disallow core files too */
        current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-        return 0;
+        return err;
 }
 void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index da72250ddc1c..cd96649bfe62 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
 /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
 static inline int ep_op_has_event(int op)
 {
-        return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
+        return op != EPOLL_CTL_DEL;
 }
 /* Initialize the poll safe wake up structure */
@@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
        return 0;
 }
-/*
- * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
- * had no event flags set, indicating that another thread may be currently
- * handling that item's events (in the case that EPOLLONESHOT was being
- * used). Otherwise a zero result indicates that the item has been disabled
- * from receiving events. A disabled item may be re-enabled via
- * EPOLL_CTL_MOD. Must be called with "mtx" held.
- */
-static int ep_disable(struct eventpoll *ep, struct epitem *epi)
-{
-        int result = 0;
-        unsigned long flags;
-        spin_lock_irqsave(&ep->lock, flags);
-        if (epi->event.events & ~EP_PRIVATE_BITS) {
-                if (ep_is_linked(&epi->rdllink))
-                        list_del_init(&epi->rdllink);
-                /* Ensure ep_poll_callback will not add epi back onto ready
-                   list: */
-                epi->event.events &= EP_PRIVATE_BITS;
-                }
-        else
-                result = -EBUSY;
-        spin_unlock_irqrestore(&ep->lock, flags);
-        return result;
-}
 static void ep_free(struct eventpoll *ep)
 {
        struct rb_node *rbp;
@@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
        rb_insert_color(&epi->rbn, &ep->rbr);
 }
 #define PATH_ARR_SIZE 5
 /*
 * These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                } else
                        error = -ENOENT;
                break;
-        case EPOLL_CTL_DISABLE:
-                if (epi)
-                        error = ep_disable(ep, epi);
-                else
-                        error = -ENOENT;
-                break;
        }
        mutex_unlock(&ep->mtx);
diff --git a/fs/exec.c b/fs/exec.c
index 8b9011b67041..0039055b1fc6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1083,7 +1083,8 @@ int flush_old_exec(struct linux_binprm * bprm)
        bprm->mm = NULL;                /* We're using it now */
        set_fs(USER_DS);
-        current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
+        current->flags &=
+                ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE);
        flush_thread();
        current->personality &= ~bprm->per_clear;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 6c205d0c565b..fa04d023177e 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -469,7 +469,7 @@ static int parse_options(char *options, struct super_block *sb)
                        uid = make_kuid(current_user_ns(), option);
                        if (!uid_valid(uid)) {
                                ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option);
-                                return -1;
+                                return 0;
                        }
                        sbi->s_resuid = uid;
@@ -480,7 +480,7 @@ static int parse_options(char *options, struct super_block *sb)
                        gid = make_kgid(current_user_ns(), option);
                        if (!gid_valid(gid)) {
                                ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option);
-                                return -1;
+                                return 0;
                        }
                        sbi->s_resgid = gid;
                        break;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 7320a66e958f..22548f56197b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -2101,8 +2101,9 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
        end = start + (range->len >> sb->s_blocksize_bits) - 1;
        minlen = range->minlen >> sb->s_blocksize_bits;
-        if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)) ||
+        if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
-            unlikely(start >= max_blks))
+            start >= max_blks ||
+            range->len < sb->s_blocksize)
                return -EINVAL;
        if (end >= max_blks)
                end = max_blks - 1;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 8f4fddac01a6..890b8947c546 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -46,8 +46,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-        bh = ext3_bread(handle, inode, *block, 1, err);
+        if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) {
-        if (bh) {
                inode->i_size += inode->i_sb->s_blocksize;
                EXT3_I(inode)->i_disksize = inode->i_size;
                *err = ext3_journal_get_write_access(handle, bh);
@@ -339,8 +338,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
        u32 hash;
        frame->bh = NULL;
-        if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+        if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) {
+                *err = ERR_BAD_DX_DIR;
                goto fail;
+        }
        root = (struct dx_root *) bh->b_data;
        if (root->info.hash_version != DX_HASH_TEA &&
            root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -436,8 +437,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
                frame->entries = entries;
                frame->at = at;
                if (!indirect--) return frame;
-                if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+                if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) {
+                        *err = ERR_BAD_DX_DIR;
                        goto fail2;
+                }
                at = entries = ((struct dx_node *) bh->b_data)->entries;
                if (dx_get_limit(entries) != dx_node_limit (dir)) {
                        ext3_warning(dir->i_sb, __func__,
@@ -535,8 +538,8 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
         * block so no check is necessary
         */
        while (num_frames--) {
-                if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+                if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at),
-                                      0, &err)))
+                                          0, &err)))
                        return err; /* Failure */
                p++;
                brelse (p->bh);
@@ -559,10 +562,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 {
        struct buffer_head *bh;
        struct ext3_dir_entry_2 *de, *top;
-        int err, count = 0;
+        int err = 0, count = 0;
        dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
-        if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+        if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err)))
                return err;
        de = (struct ext3_dir_entry_2 *) bh->b_data;
@@ -976,7 +980,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
                return NULL;
        do {
                block = dx_get_block(frame->at);
-                if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+                if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err)))
                        goto errout;
                retval = search_dirblock(bh, dir, entry,
@@ -1458,9 +1462,9 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
        }
        blocks = dir->i_size >> sb->s_blocksize_bits;
        for (block = 0; block < blocks; block++) {
-                bh = ext3_bread(handle, dir, block, 0, &retval);
+                if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval)))
-                if(!bh)
                        return retval;
                retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
                if (retval != -ENOSPC)
                        return retval;
@@ -1500,7 +1504,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
        entries = frame->entries;
        at = frame->at;
-        if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+        if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err)))
                goto cleanup;
        BUFFER_TRACE(bh, "get_write_access");
@@ -1790,8 +1794,7 @@ retry:
        inode->i_op = &ext3_dir_inode_operations;
        inode->i_fop = &ext3_dir_operations;
        inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-        dir_block = ext3_bread (handle, inode, 0, 1, &err);
+        if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err)))
-        if (!dir_block)
                goto out_clear_inode;
        BUFFER_TRACE(dir_block, "get_write_access");
@@ -1859,7 +1862,7 @@ static int empty_dir (struct inode * inode)
        sb = inode->i_sb;
        if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
-            !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+            !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) {
                if (err)
                        ext3_error(inode->i_sb, __func__,
                                   "error %d reading directory #%lu offset 0",
@@ -1890,9 +1893,8 @@ static int empty_dir (struct inode * inode)
                        (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
                        err = 0;
                        brelse (bh);
-                        bh = ext3_bread (NULL, inode,
+                        if (!(bh = ext3_dir_bread (NULL, inode,
-                                offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
+                                offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) {
-                        if (!bh) {
                                if (err)
                                        ext3_error(sb, __func__,
                                                   "error %d reading directory"
@@ -2388,7 +2390,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
                                goto end_rename;
                }
                retval = -EIO;
-                dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+                dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval);
                if (!dir_bh)
                        goto end_rename;
                if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h
index f2ce2b0065c9..46304d8c9f0a 100644
--- a/fs/ext3/namei.h
+++ b/fs/ext3/namei.h
@@ -6,3 +6,22 @@
 */
 extern struct dentry *ext3_get_parent(struct dentry *child);
+static inline struct buffer_head *ext3_dir_bread(handle_t *handle,
+                                                 struct inode *inode,
+                                                 int block, int create,
+                                                 int *err)
+{
+        struct buffer_head *bh;
+        bh = ext3_bread(handle, inode, block, create, err);
+        if (!bh && !(*err)) {
+                *err = -EIO;
+                ext3_error(inode->i_sb, __func__,
+                           "Directory hole detected on inode %lu\n",
+                           inode->i_ino);
+                return NULL;
+        }
+        return bh;
+}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 29e79713c7eb..5366393528df 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1001,7 +1001,7 @@ static int parse_options (char *options, struct super_block *sb,
                        uid = make_kuid(current_user_ns(), option);
                        if (!uid_valid(uid)) {
                                ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
-                                return -1;
+                                return 0;
                        }
                        sbi->s_resuid = uid;
@@ -1012,7 +1012,7 @@ static int parse_options (char *options, struct super_block *sb,
                        gid = make_kgid(current_user_ns(), option);
                        if (!gid_valid(gid)) {
                                ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
-                                return -1;
+                                return 0;
                        }
                        sbi->s_resgid = gid;
                        break;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1b5089067d01..cf1821784a16 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -174,8 +174,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                ext4_free_inodes_set(sb, gdp, 0);
                ext4_itable_unused_set(sb, gdp, 0);
                memset(bh->b_data, 0xff, sb->s_blocksize);
-                ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
+                ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
-                                           EXT4_BLOCKS_PER_GROUP(sb) / 8);
                return;
        }
        memset(bh->b_data, 0, sb->s_blocksize);
@@ -212,8 +211,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
         */
        ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
                             sb->s_blocksize * 8, bh->b_data);
-        ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
+        ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
-                                   EXT4_BLOCKS_PER_GROUP(sb) / 8);
        ext4_group_desc_csum_set(sb, block_group, gdp);
 }
@@ -350,7 +348,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
                return;
        }
        if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
-                        desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
+                        desc, bh))) {
                ext4_unlock_group(sb, block_group);
                ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
                return;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 5c2d1813ebe9..3285aa5a706a 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -58,11 +58,12 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
 int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
                                  struct ext4_group_desc *gdp,
-                                  struct buffer_head *bh, int sz)
+                                  struct buffer_head *bh)
 {
        __u32 hi;
        __u32 provided, calculated;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
+        int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -84,8 +85,9 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
 void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
                                struct ext4_group_desc *gdp,
-                                struct buffer_head *bh, int sz)
+                                struct buffer_head *bh)
 {
+        int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
        __u32 csum;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3ab2539b7b2e..3c20de1d59d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1882,10 +1882,10 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
                                  struct buffer_head *bh, int sz);
 void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
                                struct ext4_group_desc *gdp,
-                                struct buffer_head *bh, int sz);
+                                struct buffer_head *bh);
 int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
                                  struct ext4_group_desc *gdp,
-                                  struct buffer_head *bh, int sz);
+                                  struct buffer_head *bh);
 /* balloc.c */
 extern void ext4_validate_block_bitmap(struct super_block *sb,
@@ -2063,8 +2063,7 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
 extern int ext4_calculate_overhead(struct super_block *sb);
 extern int ext4_superblock_csum_verify(struct super_block *sb,
                                       struct ext4_super_block *es);
-extern void ext4_superblock_csum_set(struct super_block *sb,
+extern void ext4_superblock_csum_set(struct super_block *sb);
-                                     struct ext4_super_block *es);
 extern void *ext4_kvmalloc(size_t size, gfp_t flags);
 extern void *ext4_kvzalloc(size_t size, gfp_t flags);
 extern void ext4_kvfree(void *ptr);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index bfa65b49d424..b4323ba846b5 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -143,17 +143,13 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
        struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
        int err = 0;
+        ext4_superblock_csum_set(sb);
        if (ext4_handle_valid(handle)) {
-                ext4_superblock_csum_set(sb,
-                                (struct ext4_super_block *)bh->b_data);
                err = jbd2_journal_dirty_metadata(handle, bh);
                if (err)
                        ext4_journal_abort_handle(where, line, __func__,
                                                  bh, handle, err);
-        } else {
+        } else
-                ext4_superblock_csum_set(sb,
-                                (struct ext4_super_block *)bh->b_data);
                mark_buffer_dirty(bh);
-        }
        return err;
 }
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1c94cca35ed1..7011ac967208 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,6 +52,9 @@
 #define EXT4_EXT_MARK_UNINIT1   0x2  /* mark first half uninitialized */
 #define EXT4_EXT_MARK_UNINIT2   0x4  /* mark second half uninitialized */
+#define EXT4_EXT_DATA_VALID1    0x8  /* first half contains valid data */
+#define EXT4_EXT_DATA_VALID2    0x10 /* second half contains valid data */
 static __le32 ext4_extent_block_csum(struct inode *inode,
                                     struct ext4_extent_header *eh)
 {
@@ -2914,6 +2917,9 @@ static int ext4_split_extent_at(handle_t *handle,
        unsigned int ee_len, depth;
        int err = 0;
+        BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
+               (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
        ext_debug("ext4_split_extents_at: inode %lu, logical"
                "block %llu\n", inode->i_ino, (unsigned long long)split);
@@ -2972,7 +2978,14 @@ static int ext4_split_extent_at(handle_t *handle,
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
        if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
-                err = ext4_ext_zeroout(inode, &orig_ex);
+                if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
+                        if (split_flag & EXT4_EXT_DATA_VALID1)
+                                err = ext4_ext_zeroout(inode, ex2);
+                        else
+                                err = ext4_ext_zeroout(inode, ex);
+                } else
+                        err = ext4_ext_zeroout(inode, &orig_ex);
                if (err)
                        goto fix_extent_len;
                /* update the extent length and mark as initialized */
@@ -3025,12 +3038,13 @@ static int ext4_split_extent(handle_t *handle,
        uninitialized = ext4_ext_is_uninitialized(ex);
        if (map->m_lblk + map->m_len < ee_block + ee_len) {
-                split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+                split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
-                              EXT4_EXT_MAY_ZEROOUT : 0;
                flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
                if (uninitialized)
                        split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
                                       EXT4_EXT_MARK_UNINIT2;
+                if (split_flag & EXT4_EXT_DATA_VALID2)
+                        split_flag1 |= EXT4_EXT_DATA_VALID1;
                err = ext4_split_extent_at(handle, inode, path,
                                map->m_lblk + map->m_len, split_flag1, flags1);
                if (err)
@@ -3043,8 +3057,8 @@ static int ext4_split_extent(handle_t *handle,
                return PTR_ERR(path);
        if (map->m_lblk >= ee_block) {
-                split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+                split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
-                              EXT4_EXT_MAY_ZEROOUT : 0;
+                                            EXT4_EXT_DATA_VALID2);
                if (uninitialized)
                        split_flag1 |= EXT4_EXT_MARK_UNINIT1;
                if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3323,26 +3337,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
        split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
        split_flag |= EXT4_EXT_MARK_UNINIT2;
+        if (flags & EXT4_GET_BLOCKS_CONVERT)
+                split_flag |= EXT4_EXT_DATA_VALID2;
        flags |= EXT4_GET_BLOCKS_PRE_IO;
        return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
-                                              struct inode *inode,
+                                                struct inode *inode,
-                                              struct ext4_ext_path *path)
+                                                struct ext4_map_blocks *map,
+                                                struct ext4_ext_path *path)
 {
        struct ext4_extent *ex;
+        ext4_lblk_t ee_block;
+        unsigned int ee_len;
        int depth;
        int err = 0;
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
+        ee_block = le32_to_cpu(ex->ee_block);
+        ee_len = ext4_ext_get_actual_len(ex);
        ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
-                (unsigned long long)le32_to_cpu(ex->ee_block),
+                  (unsigned long long)ee_block, ee_len);
-                ext4_ext_get_actual_len(ex));
+        /* If extent is larger than requested then split is required */
+        if (ee_block != map->m_lblk || ee_len > map->m_len) {
+                err = ext4_split_unwritten_extents(handle, inode, map, path,
+                                                   EXT4_GET_BLOCKS_CONVERT);
+                if (err < 0)
+                        goto out;
+                ext4_ext_drop_refs(path);
+                path = ext4_ext_find_extent(inode, map->m_lblk, path);
+                if (IS_ERR(path)) {
+                        err = PTR_ERR(path);
+                        goto out;
+                }
+                depth = ext_depth(inode);
+                ex = path[depth].p_ext;
+        }
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
@@ -3652,7 +3687,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
        }
        /* IO end_io complete, convert the filled extent to written */
        if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
-                ret = ext4_convert_unwritten_extents_endio(handle, inode,
+                ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
                                                        path);
                if (ret >= 0) {
                        ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4428,6 +4463,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
         */
        if (len <= EXT_UNINIT_MAX_LEN << blkbits)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
+        /* Prevent race condition between unwritten */
+        ext4_flush_unwritten_io(inode);
 retry:
        while (ret >= 0 && ret < max_blocks) {
                map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fa36372f3fdf..3a100e7a62a8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -725,6 +725,10 @@ repeat_in_this_group:
                                   "inode=%lu", ino + 1);
                        continue;
                }
+                BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+                err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+                if (err)
+                        goto fail;
                ext4_lock_group(sb, group);
                ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
                ext4_unlock_group(sb, group);
@@ -738,6 +742,11 @@ repeat_in_this_group:
        goto out;
 got:
+        BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+        err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+        if (err)
+                goto fail;
        /* We may have to initialize the block bitmap if it isn't already */
        if (ext4_has_group_desc_csum(sb) &&
            gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -762,9 +771,7 @@ got:
                        ext4_free_group_clusters_set(sb, gdp,
                                ext4_free_clusters_after_init(sb, group, gdp));
                        ext4_block_bitmap_csum_set(sb, group, gdp,
-                                                   block_bitmap_bh,
+                                                   block_bitmap_bh);
-                                                   EXT4_BLOCKS_PER_GROUP(sb) /
-                                                   8);
                        ext4_group_desc_csum_set(sb, group, gdp);
                }
                ext4_unlock_group(sb, group);
@@ -773,11 +780,6 @@ got:
                        goto fail;
        }
-        BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
-        err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
-        if (err)
-                goto fail;
        BUFFER_TRACE(group_desc_bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, group_desc_bh);
        if (err)
@@ -825,11 +827,6 @@ got:
        }
        ext4_unlock_group(sb, group);
-        BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
-        err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
-        if (err)
-                goto fail;
        BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
        err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
        if (err)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f8b27bf80aca..526e55358606 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2805,8 +2805,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        }
        len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
        ext4_free_group_clusters_set(sb, gdp, len);
-        ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh,
+        ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
-                                   EXT4_BLOCKS_PER_GROUP(sb) / 8);
        ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
        ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
@@ -4666,8 +4665,7 @@ do_more:
        ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
        ext4_free_group_clusters_set(sb, gdp, ret);
-        ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
+        ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
-                                   EXT4_BLOCKS_PER_GROUP(sb) / 8);
        ext4_group_desc_csum_set(sb, block_group, gdp);
        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
@@ -4811,8 +4809,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
        mb_free_blocks(NULL, &e4b, bit, count);
        blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
        ext4_free_group_clusters_set(sb, desc, blk_free_count);
-        ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh,
+        ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
-                                   EXT4_BLOCKS_PER_GROUP(sb) / 8);
        ext4_group_desc_csum_set(sb, block_group, desc);
        ext4_unlock_group(sb, block_group);
        percpu_counter_add(&sbi->s_freeclusters_counter,
@@ -4993,8 +4990,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
        minlen = EXT4_NUM_B2C(EXT4_SB(sb),
                              range->minlen >> sb->s_blocksize_bits);
-        if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
+        if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
-            unlikely(start >= max_blks))
+            start >= max_blks ||
+            range->len < sb->s_blocksize)
                return -EINVAL;
        if (end >= max_blks)
                end = max_blks - 1;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 7a75e1086961..47bf06a2765d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
        bh = ext4_get_bitmap(sb, group_data->block_bitmap);
        if (!bh)
                return -EIO;
-        ext4_block_bitmap_csum_set(sb, group, gdp, bh,
+        ext4_block_bitmap_csum_set(sb, group, gdp, bh);
-                                   EXT4_BLOCKS_PER_GROUP(sb) / 8);
        brelse(bh);
        return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7265a0367476..80928f716850 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -143,9 +143,10 @@ int ext4_superblock_csum_verify(struct super_block *sb,
        return es->s_checksum == ext4_superblock_csum(sb, es);
 }
-void ext4_superblock_csum_set(struct super_block *sb,
+void ext4_superblock_csum_set(struct super_block *sb)
-                              struct ext4_super_block *es)
 {
+        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
                EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
                return;
@@ -1963,7 +1964,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
                sbi->s_log_groups_per_flex = 0;
                return 1;
        }
-        groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+        groups_per_flex = 1U << sbi->s_log_groups_per_flex;
        err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
        if (err)
@@ -4381,7 +4382,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
                cpu_to_le32(percpu_counter_sum_positive(
                                &EXT4_SB(sb)->s_freeinodes_counter));
        BUFFER_TRACE(sbh, "marking dirty");
-        ext4_superblock_csum_set(sb, es);
+        ext4_superblock_csum_set(sb);
        mark_buffer_dirty(sbh);
        if (sync) {
                error = sync_dirty_buffer(sbh);
diff --git a/fs/file.c b/fs/file.c
index d3b5fa80b71b..7cb71b992603 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -685,7 +685,6 @@ void do_close_on_exec(struct files_struct *files)
        struct fdtable *fdt;
        /* exec unshares first */
-        BUG_ON(atomic_read(&files->count) != 1);
        spin_lock(&files->file_lock);
        for (i = 0; ; i++) {
                unsigned long set;
@@ -900,7 +899,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
                return __close_fd(files, fd);
        if (fd >= rlimit(RLIMIT_NOFILE))
-                return -EMFILE;
+                return -EBADF;
        spin_lock(&files->file_lock);
        err = expand_files(files, fd);
@@ -926,7 +925,7 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
                return -EINVAL;
        if (newfd >= rlimit(RLIMIT_NOFILE))
-                return -EMFILE;
+                return -EBADF;
        spin_lock(&files->file_lock);
        err = expand_files(files, newfd);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 51ea267d444c..3e3422f7f0a4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -228,6 +228,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
 static void inode_sync_complete(struct inode *inode)
 {
        inode->i_state &= ~I_SYNC;
+        /* If inode is clean an unused, put it into LRU now... */
+        inode_add_lru(inode);
        /* Waiters must see I_SYNC cleared before being woken up */
        smp_mb();
        wake_up_bit(&inode->i_state, __I_SYNC);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 0def0504afc1..e056b4ce4877 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -516,15 +516,13 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
                struct gfs2_holder i_gh;
                int error;
-                gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+                error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
-                error = gfs2_glock_nq(&i_gh);
+                                           &i_gh);
-                if (error == 0) {
-                        file_accessed(file);
-                        gfs2_glock_dq(&i_gh);
-                }
-                gfs2_holder_uninit(&i_gh);
                if (error)
                        return error;
+                /* grab lock to update inode */
+                gfs2_glock_dq_uninit(&i_gh);
+                file_accessed(file);
        }
        vma->vm_ops = &gfs2_vm_ops;
@@ -677,10 +675,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        size_t writesize = iov_length(iov, nr_segs);
        struct dentry *dentry = file->f_dentry;
        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
-        struct gfs2_sbd *sdp;
        int ret;
-        sdp = GFS2_SB(file->f_mapping->host);
        ret = gfs2_rs_alloc(ip);
        if (ret)
                return ret;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2d54ee..9ceccb1595a3 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -393,12 +393,10 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct gfs2_meta_header *mh;
        struct gfs2_trans *tr;
-        lock_buffer(bd->bd_bh);
-        gfs2_log_lock(sdp);
        tr = current->journal_info;
        tr->tr_touched = 1;
        if (!list_empty(&bd->bd_list))
-                goto out;
+                return;
        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
        mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
@@ -414,9 +412,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        sdp->sd_log_num_buf++;
        list_add(&bd->bd_list, &sdp->sd_log_le_buf);
        tr->tr_num_buf_new++;
-out:
-        gfs2_log_unlock(sdp);
-        unlock_buffer(bd->bd_bh);
 }
 static void gfs2_check_magic(struct buffer_head *bh)
@@ -621,7 +616,6 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 {
-        struct gfs2_log_descriptor *ld;
        struct gfs2_meta_header *mh;
        unsigned int offset;
        struct list_head *head = &sdp->sd_log_le_revoke;
@@ -634,7 +628,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
        length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
        page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
-        ld = page_address(page);
        offset = sizeof(struct gfs2_log_descriptor);
        list_for_each_entry(bd, head, bd_list) {
@@ -777,12 +770,10 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct address_space *mapping = bd->bd_bh->b_page->mapping;
        struct gfs2_inode *ip = GFS2_I(mapping->host);
-        lock_buffer(bd->bd_bh);
-        gfs2_log_lock(sdp);
        if (tr)
                tr->tr_touched = 1;
        if (!list_empty(&bd->bd_list))
-                goto out;
+                return;
        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
        if (gfs2_is_jdata(ip)) {
@@ -793,9 +784,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        } else {
                list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
        }
-out:
-        gfs2_log_unlock(sdp);
-        unlock_buffer(bd->bd_bh);
 }
 /**
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 40c4b0d42fa8..c5af8e18f27a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -497,8 +497,11 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
        struct gfs2_quota_data **qd;
        int error;
-        if (ip->i_res == NULL)
+        if (ip->i_res == NULL) {
-                gfs2_rs_alloc(ip);
+                error = gfs2_rs_alloc(ip);
+                if (error)
+                        return error;
+        }
        qd = ip->i_res->rs_qa_qd;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3cc402ce6fea..38fe18f2f055 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -553,7 +553,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
 */
 int gfs2_rs_alloc(struct gfs2_inode *ip)
 {
-        int error = 0;
        struct gfs2_blkreserv *res;
        if (ip->i_res)
@@ -561,7 +560,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
        res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
        if (!res)
-                error = -ENOMEM;
+                return -ENOMEM;
        RB_CLEAR_NODE(&res->rs_node);
@@ -571,7 +570,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
        else
                ip->i_res = res;
        up_write(&ip->i_rw_mutex);
-        return error;
+        return 0;
 }
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1263,7 +1262,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        int ret = 0;
        u64 amt;
        u64 trimmed = 0;
+        u64 start, end, minlen;
        unsigned int x;
+        unsigned bs_shift = sdp->sd_sb.sb_bsize_shift;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -1271,19 +1272,25 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
-        if (argp == NULL) {
+        if (copy_from_user(&r, argp, sizeof(r)))
-                r.start = 0;
-                r.len = ULLONG_MAX;
-                r.minlen = 0;
-        } else if (copy_from_user(&r, argp, sizeof(r)))
                return -EFAULT;
        ret = gfs2_rindex_update(sdp);
        if (ret)
                return ret;
-        rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
+        start = r.start >> bs_shift;
-        rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
+        end = start + (r.len >> bs_shift);
+        minlen = max_t(u64, r.minlen,
+                       q->limits.discard_granularity) >> bs_shift;
+        rgd = gfs2_blk2rgrpd(sdp, start, 0);
+        rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0);
+        if (end <= start ||
+            minlen > sdp->sd_max_rg_data ||
+            start > rgd_end->rd_data0 + rgd_end->rd_data)
+                return -EINVAL;
        while (1) {
@@ -1295,7 +1302,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                        /* Trim each bitmap in the rgrp */
                        for (x = 0; x < rgd->rd_length; x++) {
                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
-                                ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
+                                ret = gfs2_rgrp_send_discards(sdp,
+                                                rgd->rd_data0, NULL, bi, minlen,
+                                                &amt);
                                if (ret) {
                                        gfs2_glock_dq_uninit(&gh);
                                        goto out;
@@ -1324,7 +1333,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 out:
        r.len = trimmed << 9;
-        if (argp && copy_to_user(argp, &r, sizeof(r)))
+        if (copy_to_user(argp, &r, sizeof(r)))
                return -EFAULT;
        return ret;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index bc737261f234..d6488674d916 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -810,7 +810,8 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
                        return;
                }
                need_unlock = 1;
-        }
+        } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
+                return;
        if (current->journal_info == NULL) {
                ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd27875ef9..413627072f36 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -155,14 +155,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_bufdata *bd;
+        lock_buffer(bh);
+        gfs2_log_lock(sdp);
        bd = bh->b_private;
        if (bd)
                gfs2_assert(sdp, bd->bd_gl == gl);
        else {
+                gfs2_log_unlock(sdp);
+                unlock_buffer(bh);
                gfs2_attach_bufdata(gl, bh, meta);
                bd = bh->b_private;
+                lock_buffer(bh);
+                gfs2_log_lock(sdp);
        }
        lops_add(sdp, bd);
+        gfs2_log_unlock(sdp);
+        unlock_buffer(bh);
 }
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/inode.c b/fs/inode.c
index b03c71957246..64999f144153 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -408,6 +408,19 @@ static void inode_lru_list_add(struct inode *inode)
        spin_unlock(&inode->i_sb->s_inode_lru_lock);
 }
+/*
+ * Add inode to LRU if needed (inode is unused and clean).
+ *
+ * Needs inode->i_lock held.
+ */
+void inode_add_lru(struct inode *inode)
+{
+        if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
+            !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
+                inode_lru_list_add(inode);
+}
 static void inode_lru_list_del(struct inode *inode)
 {
        spin_lock(&inode->i_sb->s_inode_lru_lock);
@@ -1390,8 +1403,7 @@ static void iput_final(struct inode *inode)
        if (!drop && (sb->s_flags & MS_ACTIVE)) {
                inode->i_state |= I_REFERENCED;
-                if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+                inode_add_lru(inode);
-                        inode_lru_list_add(inode);
                spin_unlock(&inode->i_lock);
                return;
        }
diff --git a/fs/internal.h b/fs/internal.h
index 916b7cbf3e3e..2f6af7f645eb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -110,6 +110,7 @@ extern int open_check_o_direct(struct file *f);
 * inode.c
 */
 extern spinlock_t inode_sb_list_lock;
+extern void inode_add_lru(struct inode *inode);
 /*
 * fs-writeback.c
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 78b7f84241d4..7f5120bf0ec2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1961,7 +1961,9 @@ retry:
                        spin_unlock(&journal->j_list_lock);
                        jbd_unlock_bh_state(bh);
                        spin_unlock(&journal->j_state_lock);
+                        unlock_buffer(bh);
                        log_wait_commit(journal, tid);
+                        lock_buffer(bh);
                        goto retry;
                }
                /*
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 60ef3fb707ff..1506673c087e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
        struct page *pg;
        struct inode *inode = mapping->host;
        struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+        struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+        struct jffs2_raw_inode ri;
+        uint32_t alloc_len = 0;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        uint32_t pageofs = index << PAGE_CACHE_SHIFT;
        int ret = 0;
+        jffs2_dbg(1, "%s()\n", __func__);
+        if (pageofs > inode->i_size) {
+                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+                if (ret)
+                        return ret;
+        }
+        mutex_lock(&f->sem);
        pg = grab_cache_page_write_begin(mapping, index, flags);
-        if (!pg)
+        if (!pg) {
+                if (alloc_len)
+                        jffs2_complete_reservation(c);
+                mutex_unlock(&f->sem);
                return -ENOMEM;
+        }
        *pagep = pg;
-        jffs2_dbg(1, "%s()\n", __func__);
+        if (alloc_len) {
-        if (pageofs > inode->i_size) {
                /* Make new hole frag from old EOF to new page */
-                struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
-                struct jffs2_raw_inode ri;
                struct jffs2_full_dnode *fn;
-                uint32_t alloc_len;
                jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
                          (unsigned int)inode->i_size, pageofs);
-                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
-                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
-                if (ret)
-                        goto out_page;
-                mutex_lock(&f->sem);
                memset(&ri, 0, sizeof(ri));
                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                if (IS_ERR(fn)) {
                        ret = PTR_ERR(fn);
                        jffs2_complete_reservation(c);
-                        mutex_unlock(&f->sem);
                        goto out_page;
                }
                ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                        jffs2_mark_node_obsolete(c, fn->raw);
                        jffs2_free_full_dnode(fn);
                        jffs2_complete_reservation(c);
-                        mutex_unlock(&f->sem);
                        goto out_page;
                }
                jffs2_complete_reservation(c);
                inode->i_size = pageofs;
-                mutex_unlock(&f->sem);
        }
        /*
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
         * case of a short-copy.
         */
        if (!PageUptodate(pg)) {
-                mutex_lock(&f->sem);
                ret = jffs2_do_readpage_nolock(inode, pg);
-                mutex_unlock(&f->sem);
                if (ret)
                        goto out_page;
        }
+        mutex_unlock(&f->sem);
        jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
        return ret;
 out_page:
        unlock_page(pg);
        page_cache_release(pg);
+        mutex_unlock(&f->sem);
        return ret;
 }
diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c
index 9947563e4175..dfcd50304559 100644
--- a/fs/jfs/jfs_discard.c
+++ b/fs/jfs/jfs_discard.c
@@ -83,7 +83,7 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
        struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
        struct super_block *sb = ipbmap->i_sb;
        int agno, agno_end;
-        s64 start, end, minlen;
+        u64 start, end, minlen;
        u64 trimmed = 0;
        /**
@@ -93,15 +93,19 @@ int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range)
         * minlen:      minimum extent length in Bytes
         */
        start = range->start >> sb->s_blocksize_bits;
-        if (start < 0)
-                start = 0;
        end = start + (range->len >> sb->s_blocksize_bits) - 1;
-        if (end >= bmp->db_mapsize)
-                end = bmp->db_mapsize - 1;
        minlen = range->minlen >> sb->s_blocksize_bits;
-        if (minlen <= 0)
+        if (minlen == 0)
                minlen = 1;
+        if (minlen > bmp->db_agsize ||
+            start >= bmp->db_mapsize ||
+            range->len < sb->s_blocksize)
+                return -EINVAL;
+        if (end >= bmp->db_mapsize)
+                end = bmp->db_mapsize - 1;
        /**
         * we trim all ag's within the range
         */
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index d269ada7670e..982d2676e1f8 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr,
 {
        __be32 *p;
-        BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+        WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
        p = xdr_reserve_space(xdr, 4);
        *p = stat;
 }
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e4fb3ba5a58a..3d7e09bcc0e9 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -85,29 +85,38 @@ static struct rpc_clnt *nsm_create(struct net *net)
        return rpc_create(&args);
 }
+static struct rpc_clnt *nsm_client_set(struct lockd_net *ln,
+                struct rpc_clnt *clnt)
+{
+        spin_lock(&ln->nsm_clnt_lock);
+        if (ln->nsm_users == 0) {
+                if (clnt == NULL)
+                        goto out;
+                ln->nsm_clnt = clnt;
+        }
+        clnt = ln->nsm_clnt;
+        ln->nsm_users++;
+out:
+        spin_unlock(&ln->nsm_clnt_lock);
+        return clnt;
+}
 static struct rpc_clnt *nsm_client_get(struct net *net)
 {
-        static DEFINE_MUTEX(nsm_create_mutex);
+        struct rpc_clnt *clnt, *new;
-        struct rpc_clnt *clnt;
        struct lockd_net *ln = net_generic(net, lockd_net_id);
-        spin_lock(&ln->nsm_clnt_lock);
+        clnt = nsm_client_set(ln, NULL);
-        if (ln->nsm_users) {
+        if (clnt != NULL)
-                ln->nsm_users++;
-                clnt = ln->nsm_clnt;
-                spin_unlock(&ln->nsm_clnt_lock);
                goto out;
-        }
-        spin_unlock(&ln->nsm_clnt_lock);
-        mutex_lock(&nsm_create_mutex);
+        clnt = new = nsm_create(net);
-        clnt = nsm_create(net);
+        if (IS_ERR(clnt))
-        if (!IS_ERR(clnt)) {
+                goto out;
-                ln->nsm_clnt = clnt;
-                smp_wmb();
+        clnt = nsm_client_set(ln, new);
-                ln->nsm_users = 1;
+        if (clnt != new)
-        }
+                rpc_shutdown_client(new);
-        mutex_unlock(&nsm_create_mutex);
 out:
        return clnt;
 }
@@ -115,18 +124,16 @@ out:
 static void nsm_client_put(struct net *net)
 {
        struct lockd_net *ln = net_generic(net, lockd_net_id);
-        struct rpc_clnt *clnt = ln->nsm_clnt;
+        struct rpc_clnt *clnt = NULL;
-        int shutdown = 0;
        spin_lock(&ln->nsm_clnt_lock);
-        if (ln->nsm_users) {
+        ln->nsm_users--;
-                if (--ln->nsm_users)
+        if (ln->nsm_users == 0) {
-                        ln->nsm_clnt = NULL;
+                clnt = ln->nsm_clnt;
-                shutdown = !ln->nsm_users;
+                ln->nsm_clnt = NULL;
        }
        spin_unlock(&ln->nsm_clnt_lock);
+        if (clnt != NULL)
-        if (shutdown)
                rpc_shutdown_client(clnt);
 }
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3009a365e082..21171f0c6477 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -68,7 +68,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
        /* Obtain file pointer. Not used by FREE_ALL call. */
        if (filp != NULL) {
-                if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
+                error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
+                if (error != 0)
                        goto no_locks;
                *filp = file;
diff --git a/fs/namei.c b/fs/namei.c
index d1895f308156..937f9d50c84b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -705,8 +705,8 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
        path_put(link);
 }
-int sysctl_protected_symlinks __read_mostly = 1;
+int sysctl_protected_symlinks __read_mostly = 0;
-int sysctl_protected_hardlinks __read_mostly = 1;
+int sysctl_protected_hardlinks __read_mostly = 0;
 /**
 * may_follow_link - Check symlink following for unsafe situations
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 9a521fb39869..5088b57b078a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -241,7 +241,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
                svc_exit_thread(cb_info->rqst);
                cb_info->rqst = NULL;
                cb_info->task = NULL;
-                return PTR_ERR(cb_info->task);
+                return ret;
        }
        dprintk("nfs_callback_up: service started\n");
        return 0;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index 31c26c4dcc23..ca4b11ec87a2 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -217,7 +217,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
 {
        char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
        struct nfs_dns_ent key, *item;
-        unsigned long ttl;
+        unsigned int ttl;
        ssize_t len;
        int ret = -EINVAL;
@@ -240,7 +240,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
        key.namelen = len;
        memset(&key.h, 0, sizeof(key.h));
-        ttl = get_expiry(&buf);
+        if (get_uint(&buf, &ttl) < 0)
+                goto out;
        if (ttl == 0)
                goto out;
        key.h.expiry_time = ttl + seconds_since_boot();
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5c7325c5c5e6..6fa01aea2488 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -685,7 +685,10 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
        if (ctx->cred != NULL)
                put_rpccred(ctx->cred);
        dput(ctx->dentry);
-        nfs_sb_deactive(sb);
+        if (is_sync)
+                nfs_sb_deactive(sb);
+        else
+                nfs_sb_deactive_async(sb);
        kfree(ctx->mdsthreshold);
        kfree(ctx);
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 59b133c5d652..05521cadac2e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -351,10 +351,12 @@ extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 extern void nfs_sb_active(struct super_block *sb);
 extern void nfs_sb_deactive(struct super_block *sb);
+extern void nfs_sb_deactive_async(struct super_block *sb);
 /* namespace.c */
+#define NFS_PATH_CANONICAL 1
 extern char *nfs_path(char **p, struct dentry *dentry,
-                      char *buffer, ssize_t buflen);
+                      char *buffer, ssize_t buflen, unsigned flags);
 extern struct vfsmount *nfs_d_automount(struct path *path);
 struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
                              struct nfs_fh *, struct nfs_fattr *);
@@ -498,7 +500,7 @@ static inline char *nfs_devname(struct dentry *dentry,
                                char *buffer, ssize_t buflen)
 {
        char *dummy;
-        return nfs_path(&dummy, dentry, buffer, buflen);
+        return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
 }
 /*
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 8e65c7f1f87c..015f71f8f62c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info)
        else
                msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
-        status = rpc_call_sync(mnt_clnt, &msg, 0);
+        status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
        rpc_shutdown_client(mnt_clnt);
        if (status < 0)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 655925373b91..dd057bc6b65b 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -33,6 +33,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
 * @dentry - pointer to dentry
 * @buffer - result buffer
 * @buflen - length of buffer
+ * @flags - options (see below)
 *
 * Helper function for constructing the server pathname
 * by arbitrary hashed dentry.
@@ -40,8 +41,14 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
 * This is mainly for use in figuring out the path on the
 * server side when automounting on top of an existing partition
 * and in generating /proc/mounts and friends.
+ *
+ * Supported flags:
+ * NFS_PATH_CANONICAL: ensure there is exactly one slash after
+ *                     the original device (export) name
+ *                     (if unset, the original name is returned verbatim)
 */
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
+char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
+               unsigned flags)
 {
        char *end;
        int namelen;
@@ -74,7 +81,7 @@ rename_retry:
                rcu_read_unlock();
                goto rename_retry;
        }
-        if (*end != '/') {
+        if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
                if (--buflen < 0) {
                        spin_unlock(&dentry->d_lock);
                        rcu_read_unlock();
@@ -91,9 +98,11 @@ rename_retry:
                return end;
        }
        namelen = strlen(base);
-        /* Strip off excess slashes in base string */
+        if (flags & NFS_PATH_CANONICAL) {
-        while (namelen > 0 && base[namelen - 1] == '/')
+                /* Strip off excess slashes in base string */
-                namelen--;
+                while (namelen > 0 && base[namelen - 1] == '/')
+                        namelen--;
+        }
        buflen -= namelen;
        if (buflen < 0) {
                spin_unlock(&dentry->d_lock);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 52d847212066..2e45fd9c02a3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -122,12 +122,21 @@ static void filelayout_reset_read(struct nfs_read_data *data)
        }
 }
+static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
+{
+        if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+                return;
+        clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
+        pnfs_return_layout(inode);
+}
 static int filelayout_async_handle_error(struct rpc_task *task,
                                         struct nfs4_state *state,
                                         struct nfs_client *clp,
                                         struct pnfs_layout_segment *lseg)
 {
-        struct inode *inode = lseg->pls_layout->plh_inode;
+        struct pnfs_layout_hdr *lo = lseg->pls_layout;
+        struct inode *inode = lo->plh_inode;
        struct nfs_server *mds_server = NFS_SERVER(inode);
        struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
        struct nfs_client *mds_client = mds_server->nfs_client;
@@ -204,10 +213,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
                dprintk("%s DS connection error %d\n", __func__,
                        task->tk_status);
                nfs4_mark_deviceid_unavailable(devid);
-                clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
+                set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
-                _pnfs_return_layout(inode);
                rpc_wake_up(&tbl->slot_tbl_waitq);
-                nfs4_ds_disconnect(clp);
                /* fall through */
        default:
 reset:
@@ -331,7 +338,9 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 static void filelayout_read_release(void *data)
 {
        struct nfs_read_data *rdata = data;
+        struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
+        filelayout_fenceme(lo->plh_inode, lo);
        nfs_put_client(rdata->ds_clp);
        rdata->header->mds_ops->rpc_release(data);
 }
@@ -429,7 +438,9 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 static void filelayout_write_release(void *data)
 {
        struct nfs_write_data *wdata = data;
+        struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
+        filelayout_fenceme(lo->plh_inode, lo);
        nfs_put_client(wdata->ds_clp);
        wdata->header->mds_ops->rpc_release(data);
 }
@@ -739,7 +750,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
                goto out_err;
        if (fl->num_fh > 0) {
-                fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
+                fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]),
                                       gfp_flags);
                if (!fl->fh_array)
                        goto out_err;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index dca47d786710..8c07241fe52b 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -149,6 +149,5 @@ extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
 filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
-void nfs4_ds_disconnect(struct nfs_client *clp);
 #endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3336d5eaf879..a8eaa9b7bb0f 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -149,28 +149,6 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
 }
 /*
- * Lookup DS by nfs_client pointer. Zero data server client pointer
- */
-void nfs4_ds_disconnect(struct nfs_client *clp)
-{
-        struct nfs4_pnfs_ds *ds;
-        struct nfs_client *found = NULL;
-        dprintk("%s clp %p\n", __func__, clp);
-        spin_lock(&nfs4_ds_cache_lock);
-        list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
-                if (ds->ds_clp && ds->ds_clp == clp) {
-                        found = ds->ds_clp;
-                        ds->ds_clp = NULL;
-                }
-        spin_unlock(&nfs4_ds_cache_lock);
-        if (found) {
-                set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
-                nfs_put_client(clp);
-        }
-}
-/*
 * Create an rpc connection to the nfs4_pnfs_ds data server
 * Currently only supports IPv4 and IPv6 addresses
 */
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 6a83780e0ce6..549462e5b9b0 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -5,6 +5,7 @@
 #include <linux/nfs_fs.h>
 #include "nfs4_fs.h"
+#include "internal.h"
 #define NFSDBG_FACILITY         NFSDBG_CLIENT
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 79fbb61ce202..1e09eb78543b 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end)
 static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
 {
        char *limit;
-        char *path = nfs_path(&limit, dentry, buffer, buflen);
+        char *path = nfs_path(&limit, dentry, buffer, buflen,
+                              NFS_PATH_CANONICAL);
        if (!IS_ERR(path)) {
                char *path_component = nfs_path_component(path, limit);
                if (path_component)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 68b21d81b7ac..5eec4429970c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -339,8 +339,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
                        dprintk("%s ERROR: %d Reset session\n", __func__,
                                errorcode);
                        nfs4_schedule_session_recovery(clp->cl_session, errorcode);
-                        exception->retry = 1;
+                        goto wait_on_recovery;
-                        break;
 #endif /* defined(CONFIG_NFS_V4_1) */
                case -NFS4ERR_FILE_OPEN:
                        if (exception->timeout > HZ) {
@@ -1572,9 +1571,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
        data->timestamp = jiffies;
        if (nfs4_setup_sequence(data->o_arg.server,
                                &data->o_arg.seq_args,
-                                &data->o_res.seq_res, task))
+                                &data->o_res.seq_res,
-                return;
+                                task) != 0)
-        rpc_call_start(task);
+                nfs_release_seqid(data->o_arg.seqid);
+        else
+                rpc_call_start(task);
        return;
 unlock_no_action:
        rcu_read_unlock();
@@ -1748,7 +1749,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
        /* even though OPEN succeeded, access is denied. Close the file */
        nfs4_close_state(state, fmode);
-        return -NFS4ERR_ACCESS;
+        return -EACCES;
 }
 /*
@@ -2196,7 +2197,7 @@ static void nfs4_free_closedata(void *data)
        nfs4_put_open_state(calldata->state);
        nfs_free_seqid(calldata->arg.seqid);
        nfs4_put_state_owner(sp);
-        nfs_sb_deactive(sb);
+        nfs_sb_deactive_async(sb);
        kfree(calldata);
 }
@@ -2296,9 +2297,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        if (nfs4_setup_sequence(NFS_SERVER(inode),
                                &calldata->arg.seq_args,
                                &calldata->res.seq_res,
-                                task))
+                                task) != 0)
-                goto out;
+                nfs_release_seqid(calldata->arg.seqid);
-        rpc_call_start(task);
+        else
+                rpc_call_start(task);
 out:
        dprintk("%s: done!\n", __func__);
 }
@@ -4529,6 +4531,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
                        if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
                                rpc_restart_call_prepare(task);
        }
+        nfs_release_seqid(calldata->arg.seqid);
 }
 static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -4545,9 +4548,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
        calldata->timestamp = jiffies;
        if (nfs4_setup_sequence(calldata->server,
                                &calldata->arg.seq_args,
-                                &calldata->res.seq_res, task))
+                                &calldata->res.seq_res,
-                return;
+                                task) != 0)
-        rpc_call_start(task);
+                nfs_release_seqid(calldata->arg.seqid);
+        else
+                rpc_call_start(task);
 }
 static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4692,7 +4697,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
        /* Do we need to do an open_to_lock_owner? */
        if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
                if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
-                        return;
+                        goto out_release_lock_seqid;
                data->arg.open_stateid = &state->stateid;
                data->arg.new_lock_owner = 1;
                data->res.open_seqid = data->arg.open_seqid;
@@ -4701,10 +4706,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
        data->timestamp = jiffies;
        if (nfs4_setup_sequence(data->server,
                                &data->arg.seq_args,
-                                &data->res.seq_res, task))
+                                &data->res.seq_res,
+                                task) == 0) {
+                rpc_call_start(task);
                return;
-        rpc_call_start(task);
+        }
-        dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
+        nfs_release_seqid(data->arg.open_seqid);
+out_release_lock_seqid:
+        nfs_release_seqid(data->arg.lock_seqid);
+        dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
 }
 static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
@@ -5667,7 +5677,7 @@ static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
                tbl->slots = new;
                tbl->max_slots = max_slots;
        }
-        tbl->highest_used_slotid = -1;  /* no slot is currently used */
+        tbl->highest_used_slotid = NFS4_NO_SLOT;
        for (i = 0; i < tbl->max_slots; i++)
                tbl->slots[i].seq_nr = ivalue;
        spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index be731e6b7b9c..c6f990656f89 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -369,7 +369,7 @@ void objio_free_result(struct objlayout_io_res *oir)
        kfree(objios);
 }
-enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
+static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
 {
        switch (oep) {
        case OSD_ERR_PRI_NO_ERROR:
@@ -574,7 +574,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
                        (unsigned long)pgio->pg_layout_private;
 }
-void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
        pnfs_generic_pg_init_read(pgio, req);
        if (unlikely(pgio->pg_lseg == NULL))
@@ -604,7 +604,7 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
        return false;
 }
-void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
        unsigned long stripe_end = 0;
        u64 wb_size;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fe624c91bd00..2878f97bd78d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -925,8 +925,8 @@ pnfs_find_alloc_layout(struct inode *ino,
        if (likely(nfsi->layout == NULL)) {     /* Won the race? */
                nfsi->layout = new;
                return new;
-        }
+        } else if (new != NULL)
-        pnfs_free_layout_hdr(new);
+                pnfs_free_layout_hdr(new);
 out_existing:
        pnfs_get_layout_hdr(nfsi->layout);
        return nfsi->layout;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2d722dba1111..dbf7bba52da0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -62,6 +62,7 @@ enum {
        NFS_LAYOUT_RW_FAILED,           /* get rw layout failed stop trying */
        NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
        NFS_LAYOUT_ROC,                 /* some lseg had roc bit set */
+        NFS_LAYOUT_RETURN,              /* Return this layout ASAP */
 };
 enum layoutdriver_policy_flags {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e831bce49766..652d3f7176a9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -54,6 +54,7 @@
 #include <linux/parser.h>
 #include <linux/nsproxy.h>
 #include <linux/rcupdate.h>
+#include <linux/kthread.h>
 #include <asm/uaccess.h>
@@ -415,6 +416,54 @@ void nfs_sb_deactive(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(nfs_sb_deactive);
+static int nfs_deactivate_super_async_work(void *ptr)
+{
+        struct super_block *sb = ptr;
+        deactivate_super(sb);
+        module_put_and_exit(0);
+        return 0;
+}
+/*
+ * same effect as deactivate_super, but will do final unmount in kthread
+ * context
+ */
+static void nfs_deactivate_super_async(struct super_block *sb)
+{
+        struct task_struct *task;
+        char buf[INET6_ADDRSTRLEN + 1];
+        struct nfs_server *server = NFS_SB(sb);
+        struct nfs_client *clp = server->nfs_client;
+        if (!atomic_add_unless(&sb->s_active, -1, 1)) {
+                rcu_read_lock();
+                snprintf(buf, sizeof(buf),
+                        rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+                rcu_read_unlock();
+                __module_get(THIS_MODULE);
+                task = kthread_run(nfs_deactivate_super_async_work, sb,
+                                "%s-deactivate-super", buf);
+                if (IS_ERR(task)) {
+                        pr_err("%s: kthread_run: %ld\n",
+                                __func__, PTR_ERR(task));
+                        /* make synchronous call and hope for the best */
+                        deactivate_super(sb);
+                        module_put(THIS_MODULE);
+                }
+        }
+}
+void nfs_sb_deactive_async(struct super_block *sb)
+{
+        struct nfs_server *server = NFS_SB(sb);
+        if (atomic_dec_and_test(&server->active))
+                nfs_deactivate_super_async(sb);
+}
+EXPORT_SYMBOL_GPL(nfs_sb_deactive_async);
 /*
 * Deliver file system statistics to userspace
 */
@@ -771,7 +820,7 @@ int nfs_show_devname(struct seq_file *m, struct dentry *root)
        int err = 0;
        if (!page)
                return -ENOMEM;
-        devname = nfs_path(&dummy, root, page, PAGE_SIZE);
+        devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0);
        if (IS_ERR(devname))
                err = PTR_ERR(devname);
        else
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 13cea637eff8..3f79c77153b8 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,7 +95,7 @@ static void nfs_async_unlink_release(void *calldata)
        nfs_dec_sillycount(data->dir);
        nfs_free_unlinkdata(data);
-        nfs_sb_deactive(sb);
+        nfs_sb_deactive_async(sb);
 }
 static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8e..a50636025364 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
                        if ((old->path.mnt == new->path.mnt) &&
                            (old->path.dentry == new->path.dentry))
                                return true;
+                        break;
                case (FSNOTIFY_EVENT_NONE):
                        return true;
                default:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 721d692fa8d4..6fcaeb8c902e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -258,7 +258,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
        if (ret)
                goto out_close_fd;
-        fd_install(fd, f);
+        if (fd != FAN_NOFD)
+                fd_install(fd, f);
        return fanotify_event_metadata.event_len;
 out_close_fd:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 144a96732dd7..9e28356a959a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = {
        .release        = mem_release,
 };
+static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
+                            loff_t *ppos)
+{
+        struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+        char buffer[PROC_NUMBUF];
+        int oom_adj = OOM_ADJUST_MIN;
+        size_t len;
+        unsigned long flags;
+        if (!task)
+                return -ESRCH;
+        if (lock_task_sighand(task, &flags)) {
+                if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
+                        oom_adj = OOM_ADJUST_MAX;
+                else
+                        oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
+                                  OOM_SCORE_ADJ_MAX;
+                unlock_task_sighand(task, &flags);
+        }
+        put_task_struct(task);
+        len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
+        return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+static ssize_t oom_adj_write(struct file *file, const char __user *buf,
+                             size_t count, loff_t *ppos)
+{
+        struct task_struct *task;
+        char buffer[PROC_NUMBUF];
+        int oom_adj;
+        unsigned long flags;
+        int err;
+        memset(buffer, 0, sizeof(buffer));
+        if (count > sizeof(buffer) - 1)
+                count = sizeof(buffer) - 1;
+        if (copy_from_user(buffer, buf, count)) {
+                err = -EFAULT;
+                goto out;
+        }
+        err = kstrtoint(strstrip(buffer), 0, &oom_adj);
+        if (err)
+                goto out;
+        if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
+             oom_adj != OOM_DISABLE) {
+                err = -EINVAL;
+                goto out;
+        }
+        task = get_proc_task(file->f_path.dentry->d_inode);
+        if (!task) {
+                err = -ESRCH;
+                goto out;
+        }
+        task_lock(task);
+        if (!task->mm) {
+                err = -EINVAL;
+                goto err_task_lock;
+        }
+        if (!lock_task_sighand(task, &flags)) {
+                err = -ESRCH;
+                goto err_task_lock;
+        }
+        /*
+         * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
+         * value is always attainable.
+         */
+        if (oom_adj == OOM_ADJUST_MAX)
+                oom_adj = OOM_SCORE_ADJ_MAX;
+        else
+                oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
+        if (oom_adj < task->signal->oom_score_adj &&
+            !capable(CAP_SYS_RESOURCE)) {
+                err = -EACCES;
+                goto err_sighand;
+        }
+        /*
+         * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
+         * /proc/pid/oom_score_adj instead.
+         */
+        printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
+                  current->comm, task_pid_nr(current), task_pid_nr(task),
+                  task_pid_nr(task));
+        task->signal->oom_score_adj = oom_adj;
+        trace_oom_score_adj_update(task);
+err_sighand:
+        unlock_task_sighand(task, &flags);
+err_task_lock:
+        task_unlock(task);
+        put_task_struct(task);
+out:
+        return err < 0 ? err : count;
+}
+static const struct file_operations proc_oom_adj_operations = {
+        .read           = oom_adj_read,
+        .write          = oom_adj_write,
+        .llseek         = generic_file_llseek,
+};
 static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
                                        size_t count, loff_t *ppos)
 {
@@ -1770,8 +1877,9 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
        if (!vma)
                goto out_no_vma;
-        result = proc_map_files_instantiate(dir, dentry, task,
+        if (vma->vm_file)
-                        (void *)(unsigned long)vma->vm_file->f_mode);
+                result = proc_map_files_instantiate(dir, dentry, task,
+                                (void *)(unsigned long)vma->vm_file->f_mode);
 out_no_vma:
        up_read(&mm->mmap_sem);
@@ -2598,6 +2706,7 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
        INF("oom_score",  S_IRUGO, proc_oom_score),
+        REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
@@ -2964,6 +3073,7 @@ static const struct pid_entry tid_base_stuff[] = {
        REG("cgroup",  S_IRUGO, proc_cgroup_operations),
 #endif
        INF("oom_score", S_IRUGO, proc_oom_score),
+        REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
        REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
 #ifdef CONFIG_AUDITSYSCALL
        REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cceaab07ad54..43973b084abf 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/proc_fs.h>
 struct  ctl_table_header;
+struct  mempolicy;
 extern struct proc_dir_entry proc_root;
 #ifdef CONFIG_PROC_SYSCTL
@@ -74,6 +75,9 @@ struct proc_maps_private {
 #ifdef CONFIG_MMU
        struct vm_area_struct *tail_vma;
 #endif
+#ifdef CONFIG_NUMA
+        struct mempolicy *task_mempolicy;
+#endif
 };
 void proc_init_inodecache(void);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 64c3b3172367..e296572c73ed 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu)
 static u64 get_idle_time(int cpu)
 {
-        u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
+        u64 idle, idle_time = -1ULL;
+        if (cpu_online(cpu))
+                idle_time = get_cpu_idle_time_us(cpu, NULL);
        if (idle_time == -1ULL)
-                /* !NO_HZ so we can rely on cpustat.idle */
+                /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
                idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
        else
                idle = usecs_to_cputime64(idle_time);
@@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu)
 static u64 get_iowait_time(int cpu)
 {
-        u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+        u64 iowait, iowait_time = -1ULL;
+        if (cpu_online(cpu))
+                iowait_time = get_cpu_iowait_time_us(cpu, NULL);
        if (iowait_time == -1ULL)
-                /* !NO_HZ so we can rely on cpustat.iowait */
+                /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
                iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
        else
                iowait = usecs_to_cputime64(iowait_time);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 79827ce03e3b..90c63f9392a5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -90,10 +90,55 @@ static void pad_len_spaces(struct seq_file *m, int len)
        seq_printf(m, "%*c", len, ' ');
 }
+#ifdef CONFIG_NUMA
+/*
+ * These functions are for numa_maps but called in generic **maps seq_file
+ * ->start(), ->stop() ops.
+ *
+ * numa_maps scans all vmas under mmap_sem and checks their mempolicy.
+ * Each mempolicy object is controlled by reference counting. The problem here
+ * is how to avoid accessing dead mempolicy object.
+ *
+ * Because we're holding mmap_sem while reading seq_file, it's safe to access
+ * each vma's mempolicy, no vma objects will never drop refs to mempolicy.
+ *
+ * A task's mempolicy (task->mempolicy) has different behavior. task->mempolicy
+ * is set and replaced under mmap_sem but unrefed and cleared under task_lock().
+ * So, without task_lock(), we cannot trust get_vma_policy() because we cannot
+ * gurantee the task never exits under us. But taking task_lock() around
+ * get_vma_plicy() causes lock order problem.
+ *
+ * To access task->mempolicy without lock, we hold a reference count of an
+ * object pointed by task->mempolicy and remember it. This will guarantee
+ * that task->mempolicy points to an alive object or NULL in numa_maps accesses.
+ */
+static void hold_task_mempolicy(struct proc_maps_private *priv)
+{
+        struct task_struct *task = priv->task;
+        task_lock(task);
+        priv->task_mempolicy = task->mempolicy;
+        mpol_get(priv->task_mempolicy);
+        task_unlock(task);
+}
+static void release_task_mempolicy(struct proc_maps_private *priv)
+{
+        mpol_put(priv->task_mempolicy);
+}
+#else
+static void hold_task_mempolicy(struct proc_maps_private *priv)
+{
+}
+static void release_task_mempolicy(struct proc_maps_private *priv)
+{
+}
+#endif
 static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
 {
        if (vma && vma != priv->tail_vma) {
                struct mm_struct *mm = vma->vm_mm;
+                release_task_mempolicy(priv);
                up_read(&mm->mmap_sem);
                mmput(mm);
        }
@@ -132,7 +177,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        tail_vma = get_gate_vma(priv->task->mm);
        priv->tail_vma = tail_vma;
+        hold_task_mempolicy(priv);
        /* Start with last addr hint */
        vma = find_vma(mm, last_addr);
        if (last_addr && vma) {
@@ -159,6 +204,7 @@ out:
        if (vma)
                return vma;
+        release_task_mempolicy(priv);
        /* End of vmas has been reached */
        m->version = (tail_vma != NULL)? 0: -1UL;
        up_read(&mm->mmap_sem);
@@ -1158,6 +1204,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        struct vm_area_struct *vma = v;
        struct numa_maps *md = &numa_priv->md;
        struct file *file = vma->vm_file;
+        struct task_struct *task = proc_priv->task;
        struct mm_struct *mm = vma->vm_mm;
        struct mm_walk walk = {};
        struct mempolicy *pol;
@@ -1177,7 +1224,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        walk.private = md;
        walk.mm = mm;
-        pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
+        pol = get_vma_policy(task, vma, vma->vm_start);
        mpol_to_str(buffer, sizeof(buffer), pol, 0);
        mpol_cond_put(pol);
@@ -1189,7 +1236,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                seq_printf(m, " heap");
        } else {
-                pid_t tid = vm_is_stack(proc_priv->task, vma, is_pid);
+                pid_t tid = vm_is_stack(task, vma, is_pid);
                if (tid != 0) {
                        /*
                         * Thread stack in /proc/PID/task/TID/maps or
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index a40da07e93d6..947fbe06c3b1 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -161,6 +161,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
        while (s < e) {
                unsigned long flags;
+                u64 id;
                if (c > psinfo->bufsize)
                        c = psinfo->bufsize;
@@ -172,7 +173,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c)
                        spin_lock_irqsave(&psinfo->buf_lock, flags);
                }
                memcpy(psinfo->buf, s, c);
-                psinfo->write(PSTORE_TYPE_CONSOLE, 0, NULL, 0, c, psinfo);
+                psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, c, psinfo);
                spin_unlock_irqrestore(&psinfo->buf_lock, flags);
                s += c;
                c = e - s;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 557a9c20a215..05ae3c97f7a5 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1160,6 +1160,8 @@ static int need_print_warning(struct dquot_warn *warn)
                        return uid_eq(current_fsuid(), warn->w_dq_id.uid);
                case GRPQUOTA:
                        return in_group_p(warn->w_dq_id.gid);
+                case PRJQUOTA:  /* Never taken... Just make gcc happy */
+                        return 0;
        }
        return 0;
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f27f01a98aa2..d83736fbc26c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1782,8 +1782,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
        BUG_ON(!th->t_trans_id);
-        dquot_initialize(inode);
+        reiserfs_write_unlock(inode->i_sb);
        err = dquot_alloc_inode(inode);
+        reiserfs_write_lock(inode->i_sb);
        if (err)
                goto out_end_trans;
        if (!dir->i_nlink) {
@@ -1979,8 +1980,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
      out_end_trans:
        journal_end(th, th->t_super, th->t_blocks_allocated);
+        reiserfs_write_unlock(inode->i_sb);
        /* Drop can be outside and it needs more credits so it's better to have it outside */
        dquot_drop(inode);
+        reiserfs_write_lock(inode->i_sb);
        inode->i_flags |= S_NOQUOTA;
        make_bad_inode(inode);
@@ -3103,10 +3106,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
        /* must be turned off for recursive notify_change calls */
        ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
-        depth = reiserfs_write_lock_once(inode->i_sb);
        if (is_quota_modification(inode, attr))
                dquot_initialize(inode);
+        depth = reiserfs_write_lock_once(inode->i_sb);
        if (attr->ia_valid & ATTR_SIZE) {
                /* version 2 items will be caught by the s_maxbytes check
                 ** done for us in vmtruncate
@@ -3170,7 +3172,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
                error = journal_begin(&th, inode->i_sb, jbegin_count);
                if (error)
                        goto out;
+                reiserfs_write_unlock_once(inode->i_sb, depth);
                error = dquot_transfer(inode, attr);
+                depth = reiserfs_write_lock_once(inode->i_sb);
                if (error) {
                        journal_end(&th, inode->i_sb, jbegin_count);
                        goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b162b8..2f40a4c70a4d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
                       key2type(&(key->on_disk_key)));
 #endif
+        reiserfs_write_unlock(inode->i_sb);
        retval = dquot_alloc_space_nodirty(inode, pasted_size);
+        reiserfs_write_lock(inode->i_sb);
        if (retval) {
                pathrelse(search_path);
                return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
                               "reiserquota insert_item(): allocating %u id=%u type=%c",
                               quota_bytes, inode->i_uid, head2type(ih));
 #endif
+                reiserfs_write_unlock(inode->i_sb);
                /* We can't dirty inode here. It would be immediately written but
                 * appropriate stat item isn't inserted yet... */
                retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+                reiserfs_write_lock(inode->i_sb);
                if (retval) {
                        pathrelse(path);
                        return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1078ae179993..418bdc3a57da 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -298,7 +298,9 @@ static int finish_unfinished(struct super_block *s)
                        retval = remove_save_link_only(s, &save_link_key, 0);
                        continue;
                }
+                reiserfs_write_unlock(s);
                dquot_initialize(inode);
+                reiserfs_write_lock(s);
                if (truncate && S_ISDIR(inode->i_mode)) {
                        /* We got a truncate request for a dir which is impossible.
@@ -1335,7 +1337,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                                kfree(qf_names[i]);
 #endif
                err = -EINVAL;
-                goto out_err;
+                goto out_unlock;
        }
 #ifdef CONFIG_QUOTA
        handle_quota_files(s, qf_names, &qfmt);
@@ -1379,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        if (blocks) {
                err = reiserfs_resize(s, blocks);
                if (err != 0)
-                        goto out_err;
+                        goto out_unlock;
        }
        if (*mount_flags & MS_RDONLY) {
@@ -1389,9 +1391,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                        /* it is read-only already */
                        goto out_ok;
+                /*
+                 * Drop write lock. Quota will retake it when needed and lock
+                 * ordering requires calling dquot_suspend() without it.
+                 */
+                reiserfs_write_unlock(s);
                err = dquot_suspend(s, -1);
                if (err < 0)
                        goto out_err;
+                reiserfs_write_lock(s);
                /* try to remount file system with read-only permissions */
                if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1401,7 +1409,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                err = journal_begin(&th, s, 10);
                if (err)
-                        goto out_err;
+                        goto out_unlock;
                /* Mounting a rw partition read-only. */
                reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1416,7 +1424,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                if (reiserfs_is_journal_aborted(journal)) {
                        err = journal->j_errno;
-                        goto out_err;
+                        goto out_unlock;
                }
                handle_data_mode(s, mount_options);
@@ -1425,7 +1433,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
                s->s_flags &= ~MS_RDONLY;       /* now it is safe to call journal_begin */
                err = journal_begin(&th, s, 10);
                if (err)
-                        goto out_err;
+                        goto out_unlock;
                /* Mount a partition which is read-only, read-write */
                reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1442,10 +1450,16 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        SB_JOURNAL(s)->j_must_wait = 1;
        err = journal_end(&th, s, 10);
        if (err)
-                goto out_err;
+                goto out_unlock;
        if (!(*mount_flags & MS_RDONLY)) {
+                /*
+                 * Drop write lock. Quota will retake it when needed and lock
+                 * ordering requires calling dquot_resume() without it.
+                 */
+                reiserfs_write_unlock(s);
                dquot_resume(s, -1);
+                reiserfs_write_lock(s);
                finish_unfinished(s);
                reiserfs_xattr_init(s, *mount_flags);
        }
@@ -1455,9 +1469,10 @@ out_ok:
        reiserfs_write_unlock(s);
        return 0;
+out_unlock:
+        reiserfs_write_unlock(s);
 out_err:
        kfree(new_opts);
-        reiserfs_write_unlock(s);
        return err;
 }
@@ -2095,13 +2110,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
                          REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (ret)
                goto out;
+        reiserfs_write_unlock(dquot->dq_sb);
        ret = dquot_commit(dquot);
+        reiserfs_write_lock(dquot->dq_sb);
        err =
            journal_end(&th, dquot->dq_sb,
                        REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (!ret && err)
                ret = err;
-      out:
+out:
        reiserfs_write_unlock(dquot->dq_sb);
        return ret;
 }
@@ -2117,13 +2134,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
                          REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (ret)
                goto out;
+        reiserfs_write_unlock(dquot->dq_sb);
        ret = dquot_acquire(dquot);
+        reiserfs_write_lock(dquot->dq_sb);
        err =
            journal_end(&th, dquot->dq_sb,
                        REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (!ret && err)
                ret = err;
-      out:
+out:
        reiserfs_write_unlock(dquot->dq_sb);
        return ret;
 }
@@ -2137,19 +2156,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
        ret =
            journal_begin(&th, dquot->dq_sb,
                          REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+        reiserfs_write_unlock(dquot->dq_sb);
        if (ret) {
                /* Release dquot anyway to avoid endless cycle in dqput() */
                dquot_release(dquot);
                goto out;
        }
        ret = dquot_release(dquot);
+        reiserfs_write_lock(dquot->dq_sb);
        err =
            journal_end(&th, dquot->dq_sb,
                        REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
        if (!ret && err)
                ret = err;
-      out:
        reiserfs_write_unlock(dquot->dq_sb);
+out:
        return ret;
 }
@@ -2174,11 +2195,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
        ret = journal_begin(&th, sb, 2);
        if (ret)
                goto out;
+        reiserfs_write_unlock(sb);
        ret = dquot_commit_info(sb, type);
+        reiserfs_write_lock(sb);
        err = journal_end(&th, sb, 2);
        if (!ret && err)
                ret = err;
-      out:
+out:
        reiserfs_write_unlock(sb);
        return ret;
 }
@@ -2203,8 +2226,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
        struct reiserfs_transaction_handle th;
        int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
-        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
+        reiserfs_write_lock(sb);
-                return -EINVAL;
+        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
+                err = -EINVAL;
+                goto out;
+        }
        /* Quotafile not on the same filesystem? */
        if (path->dentry->d_sb != sb) {
@@ -2246,8 +2272,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                if (err)
                        goto out;
        }
-        err = dquot_quota_on(sb, type, format_id, path);
+        reiserfs_write_unlock(sb);
+        return dquot_quota_on(sb, type, format_id, path);
 out:
+        reiserfs_write_unlock(sb);
        return err;
 }
@@ -2320,7 +2348,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
                tocopy = sb->s_blocksize - offset < towrite ?
                    sb->s_blocksize - offset : towrite;
                tmp_bh.b_state = 0;
+                reiserfs_write_lock(sb);
                err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
+                reiserfs_write_unlock(sb);
                if (err)
                        goto out;
                if (offset || tocopy != sb->s_blocksize)
@@ -2336,10 +2366,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
                flush_dcache_page(bh->b_page);
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
+                reiserfs_write_lock(sb);
                reiserfs_prepare_for_journal(sb, bh, 1);
                journal_mark_dirty(current->journal_info, sb, bh);
                if (!journal_quota)
                        reiserfs_add_ordered_list(inode, bh);
+                reiserfs_write_unlock(sb);
                brelse(bh);
                offset = 0;
                towrite -= tocopy;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 6b0bb00d4d2b..2fbdff6be25c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -485,20 +485,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 /**
 *      sysfs_pathname - return full path to sysfs dirent
 *      @sd: sysfs_dirent whose path we want
- *      @path: caller allocated buffer
+ *      @path: caller allocated buffer of size PATH_MAX
 *
 *      Gives the name "/" to the sysfs_root entry; any path returned
 *      is relative to wherever sysfs is mounted.
- *
- *      XXX: does no error checking on @path size
 */
 static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
 {
        if (sd->s_parent) {
                sysfs_pathname(sd->s_parent, path);
-                strcat(path, "/");
+                strlcat(path, "/", PATH_MAX);
        }
-        strcat(path, sd->s_name);
+        strlcat(path, sd->s_name, PATH_MAX);
        return path;
 }
@@ -531,9 +529,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
                char *path = kzalloc(PATH_MAX, GFP_KERNEL);
                WARN(1, KERN_WARNING
                     "sysfs: cannot create duplicate filename '%s'\n",
-                     (path == NULL) ? sd->s_name :
+                     (path == NULL) ? sd->s_name
-                     strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
+                                    : (sysfs_pathname(acxt->parent_sd, path),
-                            sd->s_name));
+                                       strlcat(path, "/", PATH_MAX),
+                                       strlcat(path, sd->s_name, PATH_MAX),
+                                       path));
                kfree(path);
        }
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 28ec13af28d9..2dcf3d473fec 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
        if (!lprops) {
                lprops = ubifs_fast_find_freeable(c);
                if (!lprops) {
-                        ubifs_assert(c->freeable_cnt == 0);
+                        /*
-                        if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+                         * The first condition means the following: go scan the
+                         * LPT if there are uncategorized lprops, which means
+                         * there may be freeable LEBs there (UBIFS does not
+                         * store the information about freeable LEBs in the
+                         * master node).
+                         */
+                        if (c->in_a_category_cnt != c->main_lebs ||
+                            c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+                                ubifs_assert(c->freeable_cnt == 0);
                                lprops = scan_for_leb_for_idx(c);
                                if (IS_ERR(lprops)) {
                                        err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index e5a2a35a46dc..46190a7c42a6 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
        default:
                ubifs_assert(0);
        }
        lprops->flags &= ~LPROPS_CAT_MASK;
        lprops->flags |= cat;
+        c->in_a_category_cnt += 1;
+        ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
 }
 /**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
        default:
                ubifs_assert(0);
        }
+        c->in_a_category_cnt -= 1;
+        ubifs_assert(c->in_a_category_cnt >= 0);
 }
 /**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 5486346d0a3f..d133c276fe05 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1183,6 +1183,8 @@ struct ubifs_debug_info;
 * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
 * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
 * @freeable_cnt: number of freeable LEBs in @freeable_list
+ * @in_a_category_cnt: count of lprops which are in a certain category, which
+ *                     basically meants that they were loaded from the flash
 *
 * @ltab_lnum: LEB number of LPT's own lprops table
 * @ltab_offs: offset of LPT's own lprops table
@@ -1412,6 +1414,7 @@ struct ubifs_info {
        struct list_head freeable_list;
        struct list_head frdi_idx_list;
        int freeable_cnt;
+        int in_a_category_cnt;
        int ltab_lnum;
        int ltab_offs;
diff --git a/fs/xattr.c b/fs/xattr.c
index e164dddb8e96..e21c119f4f99 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -846,7 +846,7 @@ static int __simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
                              const void *value, size_t size, int flags)
 {
        struct simple_xattr *xattr;
-        struct simple_xattr *uninitialized_var(new_xattr);
+        struct simple_xattr *new_xattr = NULL;
        int err = 0;
        /* value == NULL means remove */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4f33c32affe3..335206a9c698 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
        /*
         * Initialize the args structure.
         */
+        memset(&targs, 0, sizeof(targs));
        targs.tp = tp;
        targs.mp = mp;
        targs.agbp = agbp;
@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
 * group or loop over the allocation groups to find the result.
 */
 int                             /* error */
-__xfs_alloc_vextent(
+xfs_alloc_vextent(
        xfs_alloc_arg_t *args)  /* allocation argument structure */
 {
        xfs_agblock_t   agsize; /* allocation group size */
@@ -2417,46 +2418,6 @@ error0:
        return error;
 }
-static void
-xfs_alloc_vextent_worker(
-        struct work_struct      *work)
-{
-        struct xfs_alloc_arg    *args = container_of(work,
-                                                struct xfs_alloc_arg, work);
-        unsigned long           pflags;
-        /* we are in a transaction context here */
-        current_set_flags_nested(&pflags, PF_FSTRANS);
-        args->result = __xfs_alloc_vextent(args);
-        complete(args->done);
-        current_restore_flags_nested(&pflags, PF_FSTRANS);
-}
-/*
- * Data allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Metadata
- * requests, OTOH, are generally from low stack usage paths, so avoid the
- * context switch overhead here.
- */
-int
-xfs_alloc_vextent(
-        struct xfs_alloc_arg    *args)
-{
-        DECLARE_COMPLETION_ONSTACK(done);
-        if (!args->userdata)
-                return __xfs_alloc_vextent(args);
-        args->done = &done;
-        INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
-        queue_work(xfs_alloc_wq, &args->work);
-        wait_for_completion(&done);
-        return args->result;
-}
 /*
 * Free an extent.
 * Just break up the extent address and hand off to xfs_free_ag_extent
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 93be4a667ca1..feacb061bab7 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
        char            isfl;           /* set if is freelist blocks - !acctg */
        char            userdata;       /* set if this is user data */
        xfs_fsblock_t   firstblock;     /* io first block allocated */
-        struct completion *done;
-        struct work_struct work;
-        int             result;
 } xfs_alloc_arg_t;
 /*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index f1647caace8f..f7876c6d6165 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -121,6 +121,8 @@ xfs_allocbt_free_block(
        xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
                              XFS_EXTENT_BUSY_SKIP_DISCARD);
        xfs_trans_agbtree_delta(cur->bc_tp, -1);
+        xfs_trans_binval(cur->bc_tp, bp);
        return 0;
 }
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e562dd43f41f..e57e2daa357c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -481,11 +481,17 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 *
 * The fix is two passes across the ioend list - one to start writeback on the
 * buffer_heads, and then submit them for I/O on the second pass.
+ *
+ * If @fail is non-zero, it means that we have a situation where some part of
+ * the submission process has failed after we have marked paged for writeback
+ * and unlocked them. In this situation, we need to fail the ioend chain rather
+ * than submit it to IO. This typically only happens on a filesystem shutdown.
 */
 STATIC void
 xfs_submit_ioend(
        struct writeback_control *wbc,
-        xfs_ioend_t             *ioend)
+        xfs_ioend_t             *ioend,
+        int                     fail)
 {
        xfs_ioend_t             *head = ioend;
        xfs_ioend_t             *next;
@@ -506,6 +512,18 @@ xfs_submit_ioend(
                next = ioend->io_list;
                bio = NULL;
+                /*
+                 * If we are failing the IO now, just mark the ioend with an
+                 * error and finish it. This will run IO completion immediately
+                 * as there is only one reference to the ioend at this point in
+                 * time.
+                 */
+                if (fail) {
+                        ioend->io_error = -fail;
+                        xfs_finish_ioend(ioend);
+                        continue;
+                }
                for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
                        if (!bio) {
@@ -1060,7 +1078,18 @@ xfs_vm_writepage(
        xfs_start_page_writeback(page, 1, count);
-        if (ioend && imap_valid) {
+        /* if there is no IO to be submitted for this page, we are done */
+        if (!ioend)
+                return 0;
+        ASSERT(iohead);
+        /*
+         * Any errors from this point onwards need tobe reported through the IO
+         * completion path as we have marked the initial page as under writeback
+         * and unlocked it.
+         */
+        if (imap_valid) {
                xfs_off_t               end_index;
                end_index = imap.br_startoff + imap.br_blockcount;
@@ -1079,20 +1108,15 @@ xfs_vm_writepage(
                                  wbc, end_index);
        }
-        if (iohead) {
-                /*
-                 * Reserve log space if we might write beyond the on-disk
-                 * inode size.
-                 */
-                if (ioend->io_type != XFS_IO_UNWRITTEN &&
-                    xfs_ioend_is_append(ioend)) {
-                        err = xfs_setfilesize_trans_alloc(ioend);
-                        if (err)
-                                goto error;
-                }
-                xfs_submit_ioend(wbc, iohead);
+        /*
-        }
+         * Reserve log space if we might write beyond the on-disk inode size.
+         */
+        err = 0;
+        if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+                err = xfs_setfilesize_trans_alloc(ioend);
+        xfs_submit_ioend(wbc, iohead, err);
        return 0;
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d330111ca738..70eec1829776 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1291,6 +1291,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
        leaf2 = blk2->bp->b_addr;
        ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
        ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+        ASSERT(leaf2->hdr.count == 0);
        args = state->args;
        trace_xfs_attr_leaf_rebalance(args);
@@ -1361,6 +1362,7 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                 * I assert that since all callers pass in an empty
                 * second buffer, this code should never execute.
                 */
+                ASSERT(0);
                /*
                 * Figure the total bytes to be added to the destination leaf.
@@ -1422,10 +1424,24 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
                        args->index2 = 0;
                        args->blkno2 = blk2->blkno;
                } else {
+                        /*
+                         * On a double leaf split, the original attr location
+                         * is already stored in blkno2/index2, so don't
+                         * overwrite it overwise we corrupt the tree.
+                         */
                        blk2->index = blk1->index
                                    - be16_to_cpu(leaf1->hdr.count);
-                        args->index = args->index2 = blk2->index;
+                        args->index = blk2->index;
-                        args->blkno = args->blkno2 = blk2->blkno;
+                        args->blkno = blk2->blkno;
+                        if (!state->extravalid) {
+                                /*
+                                 * set the new attr location to match the old
+                                 * one and let the higher level split code
+                                 * decide where in the leaf to place it.
+                                 */
+                                args->index2 = blk2->index;
+                                args->blkno2 = blk2->blkno;
+                        }
                }
        } else {
                ASSERT(state->inleaf == 1);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 848ffa77707b..83d0cf3df930 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
         * Normal allocation, done through xfs_alloc_vextent.
         */
        tryagain = isaligned = 0;
+        memset(&args, 0, sizeof(args));
        args.tp = ap->tp;
        args.mp = mp;
        args.fsbno = ap->blkno;
@@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
         * Convert to a btree with two levels, one record in root.
         */
        XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
+        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = mp;
        args.firstblock = *firstblock;
@@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
                xfs_buf_t       *bp;    /* buffer for extent block */
                xfs_bmbt_rec_host_t *ep;/* extent record pointer */
+                memset(&args, 0, sizeof(args));
                args.tp = tp;
                args.mp = ip->i_mount;
                args.firstblock = *firstblock;
@@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
 STATIC int
-xfs_bmapi_allocate(
+__xfs_bmapi_allocate(
-        struct xfs_bmalloca     *bma,
+        struct xfs_bmalloca     *bma)
-        int                     flags)
 {
        struct xfs_mount        *mp = bma->ip->i_mount;
-        int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+        int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
                                                XFS_ATTR_FORK : XFS_DATA_FORK;
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
        int                     tmp_logflags = 0;
@@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
         * Indicate if this is the first user data in the file, or just any
         * user data.
         */
-        if (!(flags & XFS_BMAPI_METADATA)) {
+        if (!(bma->flags & XFS_BMAPI_METADATA)) {
                bma->userdata = (bma->offset == 0) ?
                        XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
        }
-        bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+        bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
        /*
         * Only want to do the alignment at the eof if it is userdata and
         * allocation length is larger than a stripe unit.
         */
        if (mp->m_dalign && bma->length >= mp->m_dalign &&
-            !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
+            !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
                error = xfs_bmap_isaeof(bma, whichfork);
                if (error)
                        return error;
        }
+        if (bma->flags & XFS_BMAPI_STACK_SWITCH)
+                bma->stack_switch = 1;
        error = xfs_bmap_alloc(bma);
        if (error)
                return error;
@@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
         * A wasdelay extent has been initialized, so shouldn't be flagged
         * as unwritten.
         */
-        if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
+        if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
            xfs_sb_version_hasextflgbit(&mp->m_sb))
                bma->got.br_state = XFS_EXT_UNWRITTEN;
@@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
        return 0;
 }
+static void
+xfs_bmapi_allocate_worker(
+        struct work_struct      *work)
+{
+        struct xfs_bmalloca     *args = container_of(work,
+                                                struct xfs_bmalloca, work);
+        unsigned long           pflags;
+        /* we are in a transaction context here */
+        current_set_flags_nested(&pflags, PF_FSTRANS);
+        args->result = __xfs_bmapi_allocate(args);
+        complete(args->done);
+        current_restore_flags_nested(&pflags, PF_FSTRANS);
+}
+/*
+ * Some allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Otherwise just
+ * call directly to avoid the context switch overhead here.
+ */
+int
+xfs_bmapi_allocate(
+        struct xfs_bmalloca     *args)
+{
+        DECLARE_COMPLETION_ONSTACK(done);
+        if (!args->stack_switch)
+                return __xfs_bmapi_allocate(args);
+        args->done = &done;
+        INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
+        queue_work(xfs_alloc_wq, &args->work);
+        wait_for_completion(&done);
+        return args->result;
+}
 STATIC int
 xfs_bmapi_convert_unwritten(
        struct xfs_bmalloca     *bma,
@@ -4919,6 +4963,7 @@ xfs_bmapi_write(
                        bma.conv = !!(flags & XFS_BMAPI_CONVERT);
                        bma.wasdel = wasdelay;
                        bma.offset = bno;
+                        bma.flags = flags;
                        /*
                         * There's a 32/64 bit type mismatch between the
@@ -4934,7 +4979,7 @@ xfs_bmapi_write(
                        ASSERT(len > 0);
                        ASSERT(bma.length > 0);
-                        error = xfs_bmapi_allocate(&bma, flags);
+                        error = xfs_bmapi_allocate(&bma);
                        if (error)
                                goto error0;
                        if (bma.blkno == NULLFSBLOCK)
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 803b56d7ce16..5f469c3516eb 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,6 +77,7 @@ typedef	struct xfs_bmap_free
 * from written to unwritten, otherwise convert from unwritten to written.
 */
 #define XFS_BMAPI_CONVERT       0x040
+#define XFS_BMAPI_STACK_SWITCH  0x080
 #define XFS_BMAPI_FLAGS \
        { XFS_BMAPI_ENTIRE,     "ENTIRE" }, \
@@ -85,7 +86,8 @@ typedef	struct xfs_bmap_free
        { XFS_BMAPI_PREALLOC,   "PREALLOC" }, \
        { XFS_BMAPI_IGSTATE,    "IGSTATE" }, \
        { XFS_BMAPI_CONTIG,     "CONTIG" }, \
-        { XFS_BMAPI_CONVERT,    "CONVERT" }
+        { XFS_BMAPI_CONVERT,    "CONVERT" }, \
+        { XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
 static inline int xfs_bmapi_aflag(int w)
@@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
        char                    userdata;/* set if is user data */
        char                    aeof;   /* allocated space at eof */
        char                    conv;   /* overwriting unwritten extents */
+        char                    stack_switch;
+        int                     flags;
+        struct completion       *done;
+        struct work_struct      work;
+        int                     result;
 } xfs_bmalloca_t;
 /*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 933b7930b863..4b0b8dd1b7b0 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1197,9 +1197,14 @@ xfs_buf_bio_end_io(
 {
        xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-        xfs_buf_ioerror(bp, -error);
+        /*
+         * don't overwrite existing errors - otherwise we can lose errors on
+         * buffers that require multiple bios to complete.
+         */
+        if (!bp->b_error)
+                xfs_buf_ioerror(bp, -error);
-        if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+        if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
                invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
        _xfs_buf_ioend(bp, 1);
@@ -1279,6 +1284,11 @@ next_chunk:
                if (size)
                        goto next_chunk;
        } else {
+                /*
+                 * This is guaranteed not to be the last io reference count
+                 * because the caller (xfs_buf_iorequest) holds a count itself.
+                 */
+                atomic_dec(&bp->b_io_remaining);
                xfs_buf_ioerror(bp, EIO);
                bio_put(bio);
        }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index a8d0ed911196..becf4a97efc6 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -526,7 +526,25 @@ xfs_buf_item_unpin(
                }
                xfs_buf_relse(bp);
        } else if (freed && remove) {
+                /*
+                 * There are currently two references to the buffer - the active
+                 * LRU reference and the buf log item. What we are about to do
+                 * here - simulate a failed IO completion - requires 3
+                 * references.
+                 *
+                 * The LRU reference is removed by the xfs_buf_stale() call. The
+                 * buf item reference is removed by the xfs_buf_iodone()
+                 * callback that is run by xfs_buf_do_callbacks() during ioend
+                 * processing (via the bp->b_iodone callback), and then finally
+                 * the ioend processing will drop the IO reference if the buffer
+                 * is marked XBF_ASYNC.
+                 *
+                 * Hence we need to take an additional reference here so that IO
+                 * completion processing doesn't free the buffer prematurely.
+                 */
                xfs_buf_lock(bp);
+                xfs_buf_hold(bp);
+                bp->b_flags |= XBF_ASYNC;
                xfs_buf_ioerror(bp, EIO);
                XFS_BUF_UNDONE(bp);
                xfs_buf_stale(bp);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c25b094efbf7..4beaede43277 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -399,9 +399,26 @@ xfs_growfs_data_private(
        /* update secondary superblocks. */
        for (agno = 1; agno < nagcount; agno++) {
-                error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                error = 0;
+                /*
+                 * new secondary superblocks need to be zeroed, not read from
+                 * disk as the contents of the new area we are growing into is
+                 * completely unknown.
+                 */
+                if (agno < oagcount) {
+                        error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
                                  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
                                  XFS_FSS_TO_BB(mp, 1), 0, &bp);
+                } else {
+                        bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
+                                  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+                                  XFS_FSS_TO_BB(mp, 1), 0);
+                        if (bp)
+                                xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+                        else
+                                error = ENOMEM;
+                }
                if (error) {
                        xfs_warn(mp,
                "error %d reading secondary superblock for ag %d",
@@ -423,7 +440,7 @@ xfs_growfs_data_private(
                        break; /* no point in continuing */
                }
        }
-        return 0;
+        return error;
 error0:
        xfs_trans_cancel(tp, XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 445bf1aef31c..c5c4ef4f2bdb 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
                                        /* boundary */
        struct xfs_perag *pag;
+        memset(&args, 0, sizeof(args));
        args.tp = tp;
        args.mp = tp->t_mountp;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2778258fcfa2..1938b41ee9f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
                 * to mark all the active inodes on the buffer stale.
                 */
                bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
-                                        mp->m_bsize * blks_per_cluster, 0);
+                                        mp->m_bsize * blks_per_cluster,
+                                        XBF_UNMAPPED);
                if (!bp)
                        return ENOMEM;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 8305f2ac6773..c1df3c623de2 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -70,7 +70,7 @@ xfs_find_handle(
        int                     hsize;
        xfs_handle_t            handle;
        struct inode            *inode;
-        struct fd               f;
+        struct fd               f = {0};
        struct path             path;
        int                     error;
        struct xfs_inode        *ip;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6ad935..7f537663365b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
                         * pointer that the caller gave to us.
                         */
                        error = xfs_bmapi_write(tp, ip, map_start_fsb,
-                                                count_fsb, 0, &first_block, 1,
+                                                count_fsb,
+                                                XFS_BMAPI_STACK_SWITCH,
+                                                &first_block, 1,
                                                imap, &nimaps, &free_list);
                        if (error)
                                goto trans_cancel;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7f4f9370d0e7..4dad756962d0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2387,14 +2387,27 @@ xlog_state_do_callback(
                                /*
-                                 * update the last_sync_lsn before we drop the
+                                 * Completion of a iclog IO does not imply that
+                                 * a transaction has completed, as transactions
+                                 * can be large enough to span many iclogs. We
+                                 * cannot change the tail of the log half way
+                                 * through a transaction as this may be the only
+                                 * transaction in the log and moving th etail to
+                                 * point to the middle of it will prevent
+                                 * recovery from finding the start of the
+                                 * transaction. Hence we should only update the
+                                 * last_sync_lsn if this iclog contains
+                                 * transaction completion callbacks on it.
+                                 *
+                                 * We have to do this before we drop the
                                 * icloglock to ensure we are the only one that
                                 * can update it.
                                 */
                                ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
                                        be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
-                                atomic64_set(&log->l_last_sync_lsn,
+                                if (iclog->ic_callback)
-                                        be64_to_cpu(iclog->ic_header.h_lsn));
+                                        atomic64_set(&log->l_last_sync_lsn,
+                                                be64_to_cpu(iclog->ic_header.h_lsn));
                        } else
                                ioerrors++;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5da3ace352bf..d308749fabf1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
                                 *   - order is important.
                                 */
                                error = xlog_bread_offset(log, 0,
-                                                bblks - split_bblks, hbp,
+                                                bblks - split_bblks, dbp,
                                                offset + BBTOB(split_bblks));
                                if (error)
                                        goto bread_err2;