29 files changed, 618 insertions, 341 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 0ee594569dcc..85b67ffa2a43 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 {
-        int err, flags;
+        int err;
        struct p9_fid *fid;
-        struct v9fs_session_info *v9ses;
-        v9ses = v9fs_dentry2v9ses(dentry);
        fid = v9fs_fid_clone_with_uid(dentry, 0);
        if (IS_ERR(fid))
                goto error_out;
@@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
         * dirty pages. We always request for the open fid in read-write
         * mode so that a partial page write which result in page
         * read can work.
-         *
-         * we don't have a tsyncfs operation for older version
-         * of protocol. So make sure the write back fid is
-         * opened in O_SYNC mode.
         */
-        if (!v9fs_proto_dotl(v9ses))
+        err = p9_client_open(fid, O_RDWR);
-                flags = O_RDWR | O_SYNC;
-        else
-                flags = O_RDWR;
-        err = p9_client_open(fid, flags);
        if (err < 0) {
                p9_client_clunk(fid);
                fid = ERR_PTR(err);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9665c2b840e6..e5ebedfc5ed8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,7 +116,6 @@ struct v9fs_session_info {
        struct list_head slist; /* list of sessions registered with v9fs */
        struct backing_dev_info bdi;
        struct rw_semaphore rename_sem;
-        struct p9_fid *root_fid; /* Used for file system sync */
 };
 /* cache_validity flags */
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b6a3b9f7fe4d..e022890c6f40 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
                        retval = v9fs_refresh_inode_dotl(fid, inode);
                else
                        retval = v9fs_refresh_inode(fid, inode);
-                if (retval <= 0)
+                if (retval == -ENOENT)
+                        return 0;
+                if (retval < 0)
                        return retval;
        }
 out_valid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ffbb113d5f33..82a7c38ddad0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -811,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
        fid = v9fs_fid_lookup(dentry);
        if (IS_ERR(fid)) {
                __putname(link);
-                link = ERR_PTR(PTR_ERR(fid));
+                link = ERR_CAST(fid);
                goto ndset;
        }
        retval = p9_client_readlink(fid, &target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f3eed3383e4f..feef6cdc1fd2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                retval = PTR_ERR(inode);
                goto release_sb;
        }
        root = d_alloc_root(inode);
        if (!root) {
                iput(inode);
@@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                p9stat_free(st);
                kfree(st);
        }
-        v9fs_fid_add(root, fid);
        retval = v9fs_get_acl(inode, fid);
        if (retval)
                goto release_sb;
-        /*
+        v9fs_fid_add(root, fid);
-         * Add the root fid to session info. This is used
-         * for file system sync. We want a cloned fid here
-         * so that we can do a sync_filesystem after a
-         * shrink_dcache_for_umount
-         */
-        v9ses->root_fid = v9fs_fid_clone(root);
-        if (IS_ERR(v9ses->root_fid)) {
-                retval = PTR_ERR(v9ses->root_fid);
-                goto release_sb;
-        }
        P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
        return dget(sb->s_root);
@@ -210,11 +200,15 @@ close_session:
        v9fs_session_close(v9ses);
        kfree(v9ses);
        return ERR_PTR(retval);
 release_sb:
        /*
-         * we will do the session_close and root dentry
+         * we will do the session_close and root dentry release
-         * release in the below call.
+         * in the below call. But we need to clunk fid, because we haven't
+         * attached the fid to dentry so it won't get clunked
+         * automatically.
         */
+        p9_client_clunk(fid);
        deactivate_locked_super(sb);
        return ERR_PTR(retval);
 }
@@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s)
        P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
        kill_anon_super(s);
-        p9_client_clunk(v9ses->root_fid);
        v9fs_session_cancel(v9ses);
        v9fs_session_close(v9ses);
        kfree(v9ses);
@@ -285,14 +279,6 @@ done:
        return res;
 }
-static int v9fs_sync_fs(struct super_block *sb, int wait)
-{
-        struct v9fs_session_info *v9ses = sb->s_fs_info;
-        P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
-        return p9_client_sync_fs(v9ses->root_fid);
-}
 static int v9fs_drop_inode(struct inode *inode)
 {
        struct v9fs_session_info *v9ses;
@@ -307,6 +293,51 @@ static int v9fs_drop_inode(struct inode *inode)
        return 1;
 }
+static int v9fs_write_inode(struct inode *inode,
+                            struct writeback_control *wbc)
+{
+        int ret;
+        struct p9_wstat wstat;
+        struct v9fs_inode *v9inode;
+        /*
+         * send an fsync request to server irrespective of
+         * wbc->sync_mode.
+         */
+        P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+        v9inode = V9FS_I(inode);
+        if (!v9inode->writeback_fid)
+                return 0;
+        v9fs_blank_wstat(&wstat);
+        ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
+        if (ret < 0) {
+                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+                return ret;
+        }
+        return 0;
+}
+static int v9fs_write_inode_dotl(struct inode *inode,
+                                 struct writeback_control *wbc)
+{
+        int ret;
+        struct v9fs_inode *v9inode;
+        /*
+         * send an fsync request to server irrespective of
+         * wbc->sync_mode.
+         */
+        P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
+        v9inode = V9FS_I(inode);
+        if (!v9inode->writeback_fid)
+                return 0;
+        ret = p9_client_fsync(v9inode->writeback_fid, 0);
+        if (ret < 0) {
+                __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+                return ret;
+        }
+        return 0;
+}
 static const struct super_operations v9fs_super_ops = {
        .alloc_inode = v9fs_alloc_inode,
        .destroy_inode = v9fs_destroy_inode,
@@ -314,17 +345,18 @@ static const struct super_operations v9fs_super_ops = {
        .evict_inode = v9fs_evict_inode,
        .show_options = generic_show_options,
        .umount_begin = v9fs_umount_begin,
+        .write_inode = v9fs_write_inode,
 };
 static const struct super_operations v9fs_super_ops_dotl = {
        .alloc_inode = v9fs_alloc_inode,
        .destroy_inode = v9fs_destroy_inode,
-        .sync_fs = v9fs_sync_fs,
        .statfs = v9fs_statfs,
        .drop_inode = v9fs_drop_inode,
        .evict_inode = v9fs_evict_inode,
        .show_options = generic_show_options,
        .umount_begin = v9fs_umount_begin,
+        .write_inode = v9fs_write_inode_dotl,
 };
 struct file_system_type v9fs_fs_type = {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f34078d702d3..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        current->mm->start_stack = bprm->p;
 #ifdef arch_randomize_brk
-        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
+        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
                current->mm->brk = current->mm->start_brk =
                        arch_randomize_brk(current->mm);
+#ifdef CONFIG_COMPAT_BRK
+                current->brk_randomized = 1;
+#endif
+        }
 #endif
        if (current->personality & MMAP_PAGE_ZERO) {
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..5d505aaa72fb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
        if (value) {
                acl = posix_acl_from_xattr(value, size);
-                if (acl == NULL) {
+                if (acl) {
-                        value = NULL;
+                        ret = posix_acl_valid(acl);
-                        size = 0;
+                        if (ret)
+                                goto out;
                } else if (IS_ERR(acl)) {
                        return PTR_ERR(acl);
                }
        }
        ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
+out:
        posix_acl_release(acl);
        return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3458b5725540..2e61fe1b6b8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,8 +740,10 @@ struct btrfs_space_info {
         */
        unsigned long reservation_progress;
-        int full;               /* indicates that we cannot allocate any more
+        int full:1;             /* indicates that we cannot allocate any more
                                   chunks for this space */
+        int chunk_alloc:1;      /* set if we are allocating a chunk */
        int force_alloc;        /* set if we need to force a chunk alloc for
                                   this space */
@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                              struct inode *inode, u64 start, u64 end);
 int btrfs_release_file(struct inode *inode, struct file *file);
+void btrfs_drop_pages(struct page **pages, size_t num_pages);
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+                      struct page **pages, size_t num_pages,
+                      loff_t pos, size_t write_bytes,
+                      struct extent_state **cached);
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8f1d44ba332f..68c84c8c24bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
                btrfs_destroy_pinned_extent(root,
                                            root->fs_info->pinned_extents);
-                t->use_count = 0;
+                atomic_set(&t->use_count, 0);
                list_del_init(&t->list);
                memset(t, 0, sizeof(*t));
                kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..31f33ba56fe8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
 #include "locking.h"
 #include "free-space-cache.h"
+/* control flags for do_chunk_alloc's force field
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
+ * if we really need one.
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
+ * if we have very few chunks already allocated.  This is
+ * used as part of the clustering code to help make sure
+ * we have a good pool of storage to cluster in, without
+ * filling the FS with empty chunks
+ *
+ */
+enum {
+        CHUNK_ALLOC_NO_FORCE = 0,
+        CHUNK_ALLOC_FORCE = 1,
+        CHUNK_ALLOC_LIMITED = 2,
+};
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->bytes_readonly = 0;
        found->bytes_may_use = 0;
        found->full = 0;
-        found->force_alloc = 0;
+        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
+        found->chunk_alloc = 0;
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
        atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
                if (!data_sinfo->full && alloc_chunk) {
                        u64 alloc_target;
-                        data_sinfo->force_alloc = 1;
+                        data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
                        spin_unlock(&data_sinfo->lock);
 alloc:
                        alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                             bytes + 2 * 1024 * 1024,
-                                             alloc_target, 0);
+                                             alloc_target,
+                                             CHUNK_ALLOC_NO_FORCE);
                        btrfs_end_transaction(trans, root);
                        if (ret < 0) {
                                if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
                if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
-                        found->force_alloc = 1;
+                        found->force_alloc = CHUNK_ALLOC_FORCE;
        }
        rcu_read_unlock();
 }
 static int should_alloc_chunk(struct btrfs_root *root,
-                              struct btrfs_space_info *sinfo, u64 alloc_bytes)
+                              struct btrfs_space_info *sinfo, u64 alloc_bytes,
+                              int force)
 {
        u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+        u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
        u64 thresh;
-        if (sinfo->bytes_used + sinfo->bytes_reserved +
+        if (force == CHUNK_ALLOC_FORCE)
-            alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+                return 1;
+        /*
+         * in limited mode, we want to have some free space up to
+         * about 1% of the FS size.
+         */
+        if (force == CHUNK_ALLOC_LIMITED) {
+                thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+                thresh = max_t(u64, 64 * 1024 * 1024,
+                               div_factor_fine(thresh, 1));
+                if (num_bytes - num_allocated < thresh)
+                        return 1;
+        }
+        /*
+         * we have two similar checks here, one based on percentage
+         * and once based on a hard number of 256MB.  The idea
+         * is that if we have a good amount of free
+         * room, don't allocate a chunk.  A good mount is
+         * less than 80% utilized of the chunks we have allocated,
+         * or more than 256MB free
+         */
+        if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
                return 0;
-        if (sinfo->bytes_used + sinfo->bytes_reserved +
+        if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
-            alloc_bytes < div_factor(num_bytes, 8))
                return 0;
        thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+        /* 256MB or 5% of the FS */
        thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
        if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
                return 0;
        return 1;
 }
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 {
        struct btrfs_space_info *space_info;
        struct btrfs_fs_info *fs_info = extent_root->fs_info;
+        int wait_for_alloc = 0;
        int ret = 0;
-        mutex_lock(&fs_info->chunk_mutex);
        flags = btrfs_reduce_alloc_profile(extent_root, flags);
        space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
        }
        BUG_ON(!space_info);
+again:
        spin_lock(&space_info->lock);
        if (space_info->force_alloc)
-                force = 1;
+                force = space_info->force_alloc;
        if (space_info->full) {
                spin_unlock(&space_info->lock);
-                goto out;
+                return 0;
        }
-        if (!force && !should_alloc_chunk(extent_root, space_info,
+        if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
-                                          alloc_bytes)) {
                spin_unlock(&space_info->lock);
-                goto out;
+                return 0;
+        } else if (space_info->chunk_alloc) {
+                wait_for_alloc = 1;
+        } else {
+                space_info->chunk_alloc = 1;
        }
        spin_unlock(&space_info->lock);
+        mutex_lock(&fs_info->chunk_mutex);
+        /*
+         * The chunk_mutex is held throughout the entirety of a chunk
+         * allocation, so once we've acquired the chunk_mutex we know that the
+         * other guy is done and we need to recheck and see if we should
+         * allocate.
+         */
+        if (wait_for_alloc) {
+                mutex_unlock(&fs_info->chunk_mutex);
+                wait_for_alloc = 0;
+                goto again;
+        }
        /*
         * If we have mixed data/metadata chunks we want to make sure we keep
         * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                space_info->full = 1;
        else
                ret = 1;
-        space_info->force_alloc = 0;
+        space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+        space_info->chunk_alloc = 0;
        spin_unlock(&space_info->lock);
-out:
        mutex_unlock(&extent_root->fs_info->chunk_mutex);
        return ret;
 }
@@ -5303,11 +5368,13 @@ loop:
                if (allowed_chunk_alloc) {
                        ret = do_chunk_alloc(trans, root, num_bytes +
-                                             2 * 1024 * 1024, data, 1);
+                                             2 * 1024 * 1024, data,
+                                             CHUNK_ALLOC_LIMITED);
                        allowed_chunk_alloc = 0;
                        done_chunk_alloc = 1;
-                } else if (!done_chunk_alloc) {
+                } else if (!done_chunk_alloc &&
-                        space_info->force_alloc = 1;
+                           space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
+                        space_info->force_alloc = CHUNK_ALLOC_LIMITED;
                }
                if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
         */
        if (empty_size || root->ref_cows)
                ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                     num_bytes + 2 * 1024 * 1024, data, 0);
+                                     num_bytes + 2 * 1024 * 1024, data,
+                                     CHUNK_ALLOC_NO_FORCE);
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
                num_bytes = num_bytes & ~(root->sectorsize - 1);
                num_bytes = max(num_bytes, min_alloc_size);
                do_chunk_alloc(trans, root->fs_info->extent_root,
-                               num_bytes, data, 1);
+                               num_bytes, data, CHUNK_ALLOC_FORCE);
                goto again;
        }
        if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
        alloc_flags = update_block_group_flags(root, cache->flags);
        if (alloc_flags != cache->flags)
-                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                               CHUNK_ALLOC_FORCE);
        ret = set_block_group_ro(cache);
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
-        ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+        ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                             CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
        ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, u64 type)
 {
        u64 alloc_flags = get_alloc_profile(root, type);
-        return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+        return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                              CHUNK_ALLOC_FORCE);
 }
 /*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..315138605088 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
        }
 }
+static void uncache_state(struct extent_state **cached_ptr)
+{
+        if (cached_ptr && (*cached_ptr)) {
+                struct extent_state *state = *cached_ptr;
+                *cached_ptr = NULL;
+                free_extent_state(state);
+        }
+}
 /*
 * set some bits on a range in the tree.  This may require allocations or
 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
 }
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                        gfp_t mask)
+                        struct extent_state **cached_state, gfp_t mask)
 {
-        return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
+        return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
-                              NULL, mask);
+                              NULL, cached_state, mask);
 }
 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
                                mask);
 }
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
-                  gfp_t mask)
 {
        return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
                                mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        do {
                struct page *page = bvec->bv_page;
+                struct extent_state *cached = NULL;
+                struct extent_state *state;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
                start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                if (++bvec <= bvec_end)
                        prefetchw(&bvec->bv_page->flags);
+                spin_lock(&tree->lock);
+                state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
+                if (state && state->start == start) {
+                        /*
+                         * take a reference on the state, unlock will drop
+                         * the ref
+                         */
+                        cache_state(state, &cached);
+                }
+                spin_unlock(&tree->lock);
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
-                                                              NULL);
+                                                              state);
                        if (ret)
                                uptodate = 0;
                }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
                                if (err)
                                        uptodate = 0;
+                                uncache_state(&cached);
                                continue;
                        }
                }
                if (uptodate) {
-                        set_extent_uptodate(tree, start, end,
+                        set_extent_uptodate(tree, start, end, &cached,
                                            GFP_ATOMIC);
                }
-                unlock_extent(tree, start, end, GFP_ATOMIC);
+                unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
                if (whole_page) {
                        if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
        do {
                struct page *page = bvec->bv_page;
+                struct extent_state *cached = NULL;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
                start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
                        prefetchw(&bvec->bv_page->flags);
                if (uptodate) {
-                        set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                        set_extent_uptodate(tree, start, end, &cached,
+                                            GFP_ATOMIC);
                } else {
                        ClearPageUptodate(page);
                        SetPageError(page);
                }
-                unlock_extent(tree, start, end, GFP_ATOMIC);
+                unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
        } while (bvec >= bio->bi_io_vec);
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        while (cur <= end) {
                if (cur >= last_byte) {
                        char *userpage;
+                        struct extent_state *cached = NULL;
                        iosize = PAGE_CACHE_SIZE - page_offset;
                        userpage = kmap_atomic(page, KM_USER0);
                        memset(userpage + page_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                            GFP_NOFS);
+                                            &cached, GFP_NOFS);
-                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                        unlock_extent_cached(tree, cur, cur + iosize - 1,
+                                             &cached, GFP_NOFS);
                        break;
                }
                em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                /* we've found a hole, just zero and go on */
                if (block_start == EXTENT_MAP_HOLE) {
                        char *userpage;
+                        struct extent_state *cached = NULL;
                        userpage = kmap_atomic(page, KM_USER0);
                        memset(userpage + page_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                            GFP_NOFS);
+                                            &cached, GFP_NOFS);
-                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                        unlock_extent_cached(tree, cur, cur + iosize - 1,
+                                             &cached, GFP_NOFS);
                        cur = cur + iosize;
                        page_offset += iosize;
                        continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
                        iocount++;
                        block_start = block_start + iosize;
                } else {
-                        set_extent_uptodate(tree, block_start, cur_end,
+                        struct extent_state *cached = NULL;
+                        set_extent_uptodate(tree, block_start, cur_end, &cached,
                                            GFP_NOFS);
-                        unlock_extent(tree, block_start, cur_end, GFP_NOFS);
+                        unlock_extent_cached(tree, block_start, cur_end,
+                                             &cached, GFP_NOFS);
                        block_start = cur_end + 1;
                }
                page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
        set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                            GFP_NOFS);
+                            NULL, GFP_NOFS);
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
        kunmap_atomic(dst_kaddr, KM_USER0);
 }
+static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
+{
+        unsigned long distance = (src > dst) ? src - dst : dst - src;
+        return distance < len;
+}
 static void copy_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
        char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
        char *src_kaddr;
-        if (dst_page != src_page)
+        if (dst_page != src_page) {
                src_kaddr = kmap_atomic(src_page, KM_USER1);
-        else
+        } else {
                src_kaddr = dst_kaddr;
+                BUG_ON(areas_overlap(src_off, dst_off, len));
+        }
        memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
        kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                       "len %lu len %lu\n", dst_offset, len, dst->len);
                BUG_ON(1);
        }
-        if (dst_offset < src_offset) {
+        if (!areas_overlap(src_offset, dst_offset, len)) {
                memcpy_extent_buffer(dst, dst_offset, src_offset, len);
                return;
        }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                   int bits, int exclusive_bits, u64 *failed_start,
                   struct extent_state **cached_state, gfp_t mask);
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                        gfp_t mask);
+                        struct extent_state **cached_state, gfp_t mask);
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                   gfp_t mask);
 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e621ea54a3fd..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 /*
 * unlocks pages after btrfs_file_write is done with them
 */
-static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
+void btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
        size_t i;
        for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
 * this also makes the decision about creating an inline extent vs
 * doing real data extents, marking pages dirty and delalloc as required.
 */
-static noinline int dirty_and_release_pages(struct btrfs_root *root,
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
-                                            struct file *file,
+                      struct page **pages, size_t num_pages,
-                                            struct page **pages,
+                      loff_t pos, size_t write_bytes,
-                                            size_t num_pages,
+                      struct extent_state **cached)
-                                            loff_t pos,
-                                            size_t write_bytes)
 {
        int err = 0;
        int i;
-        struct inode *inode = fdentry(file)->d_inode;
        u64 num_bytes;
        u64 start_pos;
        u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
-                                        NULL);
+                                        cached);
        if (err)
                return err;
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                }
                if (copied > 0) {
-                        ret = dirty_and_release_pages(root, file, pages,
+                        ret = btrfs_dirty_pages(root, inode, pages,
-                                                      dirty_pages, pos,
+                                                dirty_pages, pos, copied,
-                                                      copied);
+                                                NULL);
                        if (ret) {
                                btrfs_delalloc_release_space(inode,
                                        dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f561c953205b..11d2e9cea09e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
        struct inode *inode;
        struct rb_node *node;
        struct list_head *pos, *n;
+        struct page **pages;
        struct page *page;
        struct extent_state *cached_state = NULL;
        struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
        u64 start, end, len;
        u64 bytes = 0;
        u32 *crc, *checksums;
-        pgoff_t index = 0, last_index = 0;
        unsigned long first_page_offset;
-        int num_checksums;
+        int index = 0, num_pages = 0;
        int entries = 0;
        int bitmaps = 0;
        int ret = 0;
        bool next_page = false;
+        bool out_of_space = false;
        root = root->fs_info->tree_root;
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                return 0;
        }
-        last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+        num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+                PAGE_CACHE_SHIFT;
        filemap_write_and_wait(inode->i_mapping);
        btrfs_wait_ordered_range(inode, inode->i_size &
                                 ~(root->sectorsize - 1), (u64)-1);
        /* We need a checksum per page. */
-        num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+        crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
-        crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
        if (!crc) {
                iput(inode);
                return 0;
        }
+        pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+        if (!pages) {
+                kfree(crc);
+                iput(inode);
+                return 0;
+        }
        /* Since the first page has all of our checksums and our generation we
         * need to calculate the offset into the page that we can start writing
         * our entries.
         */
-        first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+        first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
        /* Get the cluster for this block_group if it exists */
        if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
         * after find_get_page at this point.  Just putting this here so people
         * know and don't freak out.
         */
-        while (index <= last_index) {
+        while (index < num_pages) {
                page = grab_cache_page(inode->i_mapping, index);
                if (!page) {
-                        pgoff_t i = 0;
+                        int i;
-                        while (i < index) {
+                        for (i = 0; i < num_pages; i++) {
-                                page = find_get_page(inode->i_mapping, i);
+                                unlock_page(pages[i]);
-                                unlock_page(page);
+                                page_cache_release(pages[i]);
-                                page_cache_release(page);
-                                page_cache_release(page);
-                                i++;
                        }
                        goto out_free;
                }
+                pages[index] = page;
                index++;
        }
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                        offset = start_offset;
                }
-                page = find_get_page(inode->i_mapping, index);
+                if (index >= num_pages) {
+                        out_of_space = true;
+                        break;
+                }
+                page = pages[index];
                addr = kmap(page);
                entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                bytes += PAGE_CACHE_SIZE;
-                ClearPageChecked(page);
-                set_page_extent_mapped(page);
-                SetPageUptodate(page);
-                set_page_dirty(page);
-                /*
-                 * We need to release our reference we got for grab_cache_page,
-                 * except for the first page which will hold our checksums, we
-                 * do that below.
-                 */
-                if (index != 0) {
-                        unlock_page(page);
-                        page_cache_release(page);
-                }
-                page_cache_release(page);
                index++;
        } while (node || next_page);
@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                struct btrfs_free_space *entry =
                        list_entry(pos, struct btrfs_free_space, list);
-                page = find_get_page(inode->i_mapping, index);
+                if (index >= num_pages) {
+                        out_of_space = true;
+                        break;
+                }
+                page = pages[index];
                addr = kmap(page);
                memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                crc++;
                bytes += PAGE_CACHE_SIZE;
-                ClearPageChecked(page);
-                set_page_extent_mapped(page);
-                SetPageUptodate(page);
-                set_page_dirty(page);
-                unlock_page(page);
-                page_cache_release(page);
-                page_cache_release(page);
                list_del_init(&entry->list);
                index++;
        }
+        if (out_of_space) {
+                btrfs_drop_pages(pages, num_pages);
+                unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+                                     i_size_read(inode) - 1, &cached_state,
+                                     GFP_NOFS);
+                ret = 0;
+                goto out_free;
+        }
        /* Zero out the rest of the pages just to make sure */
-        while (index <= last_index) {
+        while (index < num_pages) {
                void *addr;
-                page = find_get_page(inode->i_mapping, index);
+                page = pages[index];
                addr = kmap(page);
                memset(addr, 0, PAGE_CACHE_SIZE);
                kunmap(page);
-                ClearPageChecked(page);
-                set_page_extent_mapped(page);
-                SetPageUptodate(page);
-                set_page_dirty(page);
-                unlock_page(page);
-                page_cache_release(page);
-                page_cache_release(page);
                bytes += PAGE_CACHE_SIZE;
                index++;
        }
-        btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
        /* Write the checksums and trans id to the first page */
        {
                void *addr;
                u64 *gen;
-                page = find_get_page(inode->i_mapping, 0);
+                page = pages[0];
                addr = kmap(page);
-                memcpy(addr, checksums, sizeof(u32) * num_checksums);
+                memcpy(addr, checksums, sizeof(u32) * num_pages);
-                gen = addr + (sizeof(u32) * num_checksums);
+                gen = addr + (sizeof(u32) * num_pages);
                *gen = trans->transid;
                kunmap(page);
-                ClearPageChecked(page);
-                set_page_extent_mapped(page);
-                SetPageUptodate(page);
-                set_page_dirty(page);
-                unlock_page(page);
-                page_cache_release(page);
-                page_cache_release(page);
        }
-        BTRFS_I(inode)->generation = trans->transid;
+        ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
+                                            bytes, &cached_state);
+        btrfs_drop_pages(pages, num_pages);
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                             i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+        if (ret) {
+                ret = 0;
+                goto out_free;
+        }
+        BTRFS_I(inode)->generation = trans->transid;
        filemap_write_and_wait(inode->i_mapping);
        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
                BTRFS_I(inode)->generation = 0;
        }
        kfree(checksums);
+        kfree(pages);
        btrfs_update_inode(trans, root, inode);
        iput(inode);
        return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5cc64ab9c485..fcd66b6a8086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1770,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
-        btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+        ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-        ret = btrfs_update_inode(trans, root, inode);
+        if (!ret) {
-        BUG_ON(ret);
+                ret = btrfs_update_inode(trans, root, inode);
+                BUG_ON(ret);
+        }
+        ret = 0;
 out:
        if (nolock) {
                if (trans)
@@ -2590,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
+        if (!leaf->map_token)
+                map_private_extent_buffer(leaf, (unsigned long)item,
+                                          sizeof(struct btrfs_inode_item),
+                                          &leaf->map_token, &leaf->kaddr,
+                                          &leaf->map_start, &leaf->map_len,
+                                          KM_USER1);
        btrfs_set_inode_uid(leaf, item, inode->i_uid);
        btrfs_set_inode_gid(leaf, item, inode->i_gid);
        btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2618,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
        btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
+        if (leaf->map_token) {
+                unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+                leaf->map_token = NULL;
+        }
 }
 /*
@@ -4207,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        struct btrfs_key found_key;
        struct btrfs_path *path;
        int ret;
-        u32 nritems;
        struct extent_buffer *leaf;
        int slot;
-        int advance;
        unsigned char d_type;
        int over = 0;
        u32 di_cur;
@@ -4253,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
-        advance = 0;
        while (1) {
                leaf = path->nodes[0];
-                nritems = btrfs_header_nritems(leaf);
                slot = path->slots[0];
-                if (advance || slot >= nritems) {
+                if (slot >= btrfs_header_nritems(leaf)) {
-                        if (slot >= nritems - 1) {
+                        ret = btrfs_next_leaf(root, path);
-                                ret = btrfs_next_leaf(root, path);
+                        if (ret < 0)
-                                if (ret)
+                                goto err;
-                                        break;
+                        else if (ret > 0)
-                                leaf = path->nodes[0];
+                                break;
-                                nritems = btrfs_header_nritems(leaf);
+                        continue;
-                                slot = path->slots[0];
-                        } else {
-                                slot++;
-                                path->slots[0]++;
-                        }
                }
-                advance = 1;
                item = btrfs_item_nr(leaf, slot);
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@ -4282,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                if (btrfs_key_type(&found_key) != key_type)
                        break;
                if (found_key.offset < filp->f_pos)
-                        continue;
+                        goto next;
                filp->f_pos = found_key.offset;
@@ -4335,6 +4340,8 @@ skip:
                        di_cur += di_len;
                        di = (struct btrfs_dir_item *)((char *)di + di_len);
                }
+next:
+                path->slots[0]++;
        }
        /* Reached end of directory/root. Bump pos past the last item. */
@@ -4527,14 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        BUG_ON(!path);
        inode = new_inode(root->fs_info->sb);
-        if (!inode)
+        if (!inode) {
+                btrfs_free_path(path);
                return ERR_PTR(-ENOMEM);
+        }
        if (dir) {
                trace_btrfs_inode_request(dir);
                ret = btrfs_set_inode_index(dir, index);
                if (ret) {
+                        btrfs_free_path(path);
                        iput(inode);
                        return ERR_PTR(ret);
                }
@@ -4834,9 +4844,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        if (inode->i_nlink == ~0U)
                return -EMLINK;
-        btrfs_inc_nlink(inode);
-        inode->i_ctime = CURRENT_TIME;
        err = btrfs_set_inode_index(dir, &index);
        if (err)
                goto fail;
@@ -4852,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                goto fail;
        }
+        btrfs_inc_nlink(inode);
+        inode->i_ctime = CURRENT_TIME;
        btrfs_set_trans_block_group(trans, dir);
        ihold(inode);
@@ -5221,7 +5231,7 @@ again:
                        btrfs_mark_buffer_dirty(leaf);
                }
                set_extent_uptodate(io_tree, em->start,
-                                    extent_map_end(em) - 1, GFP_NOFS);
+                                    extent_map_end(em) - 1, NULL, GFP_NOFS);
                goto insert;
        } else {
                printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5428,17 +5438,30 @@ out:
 }
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+                                                  struct extent_map *em,
                                                  u64 start, u64 len)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
-        struct extent_map *em;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
+        bool insert = false;
-        btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+        /*
+         * Ok if the extent map we looked up is a hole and is for the exact
+         * range we want, there is no reason to allocate a new one, however if
+         * it is not right then we need to free this one and drop the cache for
+         * our range.
+         */
+        if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
+            em->len != len) {
+                free_extent_map(em);
+                em = NULL;
+                insert = true;
+                btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+        }
        trans = btrfs_join_transaction(root, 0);
        if (IS_ERR(trans))
@@ -5454,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                goto out;
        }
-        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
-                em = ERR_PTR(-ENOMEM);
+                em = alloc_extent_map(GFP_NOFS);
-                goto out;
+                if (!em) {
+                        em = ERR_PTR(-ENOMEM);
+                        goto out;
+                }
        }
        em->start = start;
@@ -5467,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        em->block_start = ins.objectid;
        em->block_len = ins.offset;
        em->bdev = root->fs_info->fs_devices->latest_bdev;
+        /*
+         * We need to do this because if we're using the original em we searched
+         * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
+         */
+        em->flags = 0;
        set_bit(EXTENT_FLAG_PINNED, &em->flags);
-        while (1) {
+        while (insert) {
                write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
@@ -5687,8 +5718,7 @@ must_cow:
         * it above
         */
        len = bh_result->b_size;
-        free_extent_map(em);
+        em = btrfs_new_extent_direct(inode, em, start, len);
-        em = btrfs_new_extent_direct(inode, start, len);
        if (IS_ERR(em))
                return PTR_ERR(em);
        len = min(len, em->len - (start - em->start));
@@ -5851,8 +5881,10 @@ again:
        }
        add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
-        btrfs_ordered_update_i_size(inode, 0, ordered);
+        ret = btrfs_ordered_update_i_size(inode, 0, ordered);
-        btrfs_update_inode(trans, root, inode);
+        if (!ret)
+                btrfs_update_inode(trans, root, inode);
+        ret = 0;
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
                             ordered->file_offset + ordered->len - 1,
@@ -5938,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                         int rw, u64 file_offset, int skip_sum,
-                                         u32 *csums)
+                                         u32 *csums, int async_submit)
 {
        int write = rw & REQ_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5949,13 +5981,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
        if (ret)
                goto err;
-        if (write && !skip_sum) {
+        if (skip_sum)
+                goto map;
+        if (write && async_submit) {
                ret = btrfs_wq_submit_bio(root->fs_info,
                                   inode, rw, bio, 0, 0,
                                   file_offset,
                                   __btrfs_submit_bio_start_direct_io,
                                   __btrfs_submit_bio_done);
                goto err;
+        } else if (write) {
+                /*
+                 * If we aren't doing async submit, calculate the csum of the
+                 * bio now.
+                 */
+                ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
+                if (ret)
+                        goto err;
        } else if (!skip_sum) {
                ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
                                          file_offset, csums);
@@ -5963,7 +6006,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                        goto err;
        }
-        ret = btrfs_map_bio(root, rw, bio, 0, 1);
+map:
+        ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
 err:
        bio_put(bio);
        return ret;
@@ -5985,15 +6029,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        int nr_pages = 0;
        u32 *csums = dip->csums;
        int ret = 0;
+        int async_submit = 0;
        int write = rw & REQ_WRITE;
-        bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
-        if (!bio)
-                return -ENOMEM;
-        bio->bi_private = dip;
-        bio->bi_end_io = btrfs_end_dio_bio;
-        atomic_inc(&dip->pending_bios);
        map_length = orig_bio->bi_size;
        ret = btrfs_map_block(map_tree, READ, start_sector << 9,
                              &map_length, NULL, 0);
@@ -6002,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                return -EIO;
        }
+        if (map_length >= orig_bio->bi_size) {
+                bio = orig_bio;
+                goto submit;
+        }
+        async_submit = 1;
+        bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+        if (!bio)
+                return -ENOMEM;
+        bio->bi_private = dip;
+        bio->bi_end_io = btrfs_end_dio_bio;
+        atomic_inc(&dip->pending_bios);
        while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
                if (unlikely(map_length < submit_len + bvec->bv_len ||
                    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6015,7 +6066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                        atomic_inc(&dip->pending_bios);
                        ret = __btrfs_submit_dio_bio(bio, inode, rw,
                                                     file_offset, skip_sum,
-                                                     csums);
+                                                     csums, async_submit);
                        if (ret) {
                                bio_put(bio);
                                atomic_dec(&dip->pending_bios);
@@ -6052,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                }
        }
+submit:
        ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-                                     csums);
+                                     csums, async_submit);
        if (!ret)
                return 0;
@@ -6148,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
                        unsigned long nr_segs)
 {
        int seg;
+        int i;
        size_t size;
        unsigned long addr;
        unsigned blocksize_mask = root->sectorsize - 1;
@@ -6162,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
                addr = (unsigned long)iov[seg].iov_base;
                size = iov[seg].iov_len;
                end += size;
-                if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+                if ((addr & blocksize_mask) || (size & blocksize_mask))
                        goto out;
+                /* If this is a write we don't need to check anymore */
+                if (rw & WRITE)
+                        continue;
+                /*
+                 * Check to make sure we don't have duplicate iov_base's in this
+                 * iovec, if so return EINVAL, otherwise we'll get csum errors
+                 * when reading back.
+                 */
+                for (i = seg + 1; i < nr_segs; i++) {
+                        if (iov[seg].iov_base == iov[i].iov_base)
+                                goto out;
+                }
        }
        retval = 0;
 out:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cfc264fefdb0..ffb48d6c5433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        struct btrfs_ioctl_space_info space;
        struct btrfs_ioctl_space_info *dest;
        struct btrfs_ioctl_space_info *dest_orig;
-        struct btrfs_ioctl_space_info *user_dest;
+        struct btrfs_ioctl_space_info __user *user_dest;
        struct btrfs_space_info *info;
        u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
                       BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 58e7de9cc90c..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-        Opt_enospc_debug, Opt_err,
+        Opt_enospc_debug, Opt_subvolrootid, Opt_err,
 };
 static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_enospc_debug, "enospc_debug"},
+        {Opt_subvolrootid, "subvolrootid=%d"},
        {Opt_err, NULL},
 };
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        break;
                case Opt_subvol:
                case Opt_subvolid:
+                case Opt_subvolrootid:
                case Opt_device:
                        /*
                         * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
 */
 static int btrfs_parse_early_options(const char *options, fmode_t flags,
                void *holder, char **subvol_name, u64 *subvol_objectid,
-                struct btrfs_fs_devices **fs_devices)
+                u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
 {
        substring_t args[MAX_OPT_ARGS];
        char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                                        *subvol_objectid = intarg;
                        }
                        break;
+                case Opt_subvolrootid:
+                        intarg = 0;
+                        error = match_int(&args[0], &intarg);
+                        if (!error) {
+                                /* we want the original fs_tree */
+                                if (!intarg)
+                                        *subvol_rootid =
+                                                BTRFS_FS_TREE_OBJECTID;
+                                else
+                                        *subvol_rootid = intarg;
+                        }
+                        break;
                case Opt_device:
                        error = btrfs_scan_one_device(match_strdup(&args[0]),
                                        flags, holder, fs_devices);
@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        fmode_t mode = FMODE_READ;
        char *subvol_name = NULL;
        u64 subvol_objectid = 0;
+        u64 subvol_rootid = 0;
        int error = 0;
        if (!(flags & MS_RDONLY))
@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        error = btrfs_parse_early_options(data, mode, fs_type,
                                          &subvol_name, &subvol_objectid,
-                                          &fs_devices);
+                                          &subvol_rootid, &fs_devices);
        if (error)
                return ERR_PTR(error);
@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                s->s_flags |= MS_ACTIVE;
        }
-        root = get_default_root(s, subvol_objectid);
-        if (IS_ERR(root)) {
-                error = PTR_ERR(root);
-                deactivate_locked_super(s);
-                goto error_free_subvol_name;
-        }
        /* if they gave us a subvolume name bind mount into that */
        if (strcmp(subvol_name, ".")) {
                struct dentry *new_root;
+                root = get_default_root(s, subvol_rootid);
+                if (IS_ERR(root)) {
+                        error = PTR_ERR(root);
+                        deactivate_locked_super(s);
+                        goto error_free_subvol_name;
+                }
                mutex_lock(&root->d_inode->i_mutex);
                new_root = lookup_one_len(subvol_name, root,
                                      strlen(subvol_name));
@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                }
                dput(root);
                root = new_root;
+        } else {
+                root = get_default_root(s, subvol_objectid);
+                if (IS_ERR(root)) {
+                        error = PTR_ERR(root);
+                        deactivate_locked_super(s);
+                        goto error_free_subvol_name;
+                }
        }
        kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b158da7e0bb..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
 static noinline void put_transaction(struct btrfs_transaction *transaction)
 {
-        WARN_ON(transaction->use_count == 0);
+        WARN_ON(atomic_read(&transaction->use_count) == 0);
-        transaction->use_count--;
+        if (atomic_dec_and_test(&transaction->use_count)) {
-        if (transaction->use_count == 0) {
-                list_del_init(&transaction->list);
                memset(transaction, 0, sizeof(*transaction));
                kmem_cache_free(btrfs_transaction_cachep, transaction);
        }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
                if (!cur_trans)
                        return -ENOMEM;
                root->fs_info->generation++;
-                cur_trans->num_writers = 1;
+                atomic_set(&cur_trans->num_writers, 1);
                cur_trans->num_joined = 0;
                cur_trans->transid = root->fs_info->generation;
                init_waitqueue_head(&cur_trans->writer_wait);
                init_waitqueue_head(&cur_trans->commit_wait);
                cur_trans->in_commit = 0;
                cur_trans->blocked = 0;
-                cur_trans->use_count = 1;
+                atomic_set(&cur_trans->use_count, 1);
                cur_trans->commit_done = 0;
                cur_trans->start_time = get_seconds();
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
                root->fs_info->running_transaction = cur_trans;
                spin_unlock(&root->fs_info->new_trans_lock);
        } else {
-                cur_trans->num_writers++;
+                atomic_inc(&cur_trans->num_writers);
                cur_trans->num_joined++;
        }
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
        cur_trans = root->fs_info->running_transaction;
        if (cur_trans && cur_trans->blocked) {
                DEFINE_WAIT(wait);
-                cur_trans->use_count++;
+                atomic_inc(&cur_trans->use_count);
                while (1) {
                        prepare_to_wait(&root->fs_info->transaction_wait, &wait,
                                        TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
+        int retries = 0;
        int ret;
        if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
        }
        cur_trans = root->fs_info->running_transaction;
-        cur_trans->use_count++;
+        atomic_inc(&cur_trans->use_count);
        if (type != TRANS_JOIN_NOLOCK)
                mutex_unlock(&root->fs_info->trans_mutex);
@@ -224,10 +223,18 @@ again:
        if (num_items > 0) {
                ret = btrfs_trans_reserve_metadata(h, root, num_items);
-                if (ret == -EAGAIN) {
+                if (ret == -EAGAIN && !retries) {
+                        retries++;
                        btrfs_commit_transaction(h, root);
                        goto again;
+                } else if (ret == -EAGAIN) {
+                        /*
+                         * We have already retried and got EAGAIN, so really we
+                         * don't have space, so set ret to -ENOSPC.
+                         */
+                        ret = -ENOSPC;
                }
                if (ret < 0) {
                        btrfs_end_transaction(h, root);
                        return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
                        goto out_unlock;  /* nothing committing|committed */
        }
-        cur_trans->use_count++;
+        atomic_inc(&cur_trans->use_count);
        mutex_unlock(&root->fs_info->trans_mutex);
        wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                        wake_up_process(info->transaction_kthread);
        }
-        if (lock)
-                mutex_lock(&info->trans_mutex);
        WARN_ON(cur_trans != info->running_transaction);
-        WARN_ON(cur_trans->num_writers < 1);
+        WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
-        cur_trans->num_writers--;
+        atomic_dec(&cur_trans->num_writers);
        smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
        put_transaction(cur_trans);
-        if (lock)
-                mutex_unlock(&info->trans_mutex);
        if (current->journal_info == trans)
                current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
        /* take transaction reference */
        mutex_lock(&root->fs_info->trans_mutex);
        cur_trans = trans->transaction;
-        cur_trans->use_count++;
+        atomic_inc(&cur_trans->use_count);
        mutex_unlock(&root->fs_info->trans_mutex);
        btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        mutex_lock(&root->fs_info->trans_mutex);
        if (cur_trans->in_commit) {
-                cur_trans->use_count++;
+                atomic_inc(&cur_trans->use_count);
                mutex_unlock(&root->fs_info->trans_mutex);
                btrfs_end_transaction(trans, root);
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
                if (!prev_trans->commit_done) {
-                        prev_trans->use_count++;
+                        atomic_inc(&prev_trans->use_count);
                        mutex_unlock(&root->fs_info->trans_mutex);
                        wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                TASK_UNINTERRUPTIBLE);
                smp_mb();
-                if (cur_trans->num_writers > 1)
+                if (atomic_read(&cur_trans->num_writers) > 1)
                        schedule_timeout(MAX_SCHEDULE_TIMEOUT);
                else if (should_grow)
                        schedule_timeout(1);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
-        } while (cur_trans->num_writers > 1 ||
+        } while (atomic_read(&cur_trans->num_writers) > 1 ||
                 (should_grow && cur_trans->num_joined != joined));
        ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        wake_up(&cur_trans->commit_wait);
+        list_del_init(&cur_trans->list);
        put_transaction(cur_trans);
        put_transaction(cur_trans);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
         * total writers in this transaction, it must be zero before the
         * transaction can end
         */
-        unsigned long num_writers;
+        atomic_t num_writers;
        unsigned long num_joined;
        int in_commit;
-        int use_count;
+        atomic_t use_count;
        int commit_done;
        int blocked;
        struct list_head list;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_dir_item *di;
-        int ret = 0, slot, advance;
+        int ret = 0, slot;
        size_t total_size = 0, size_left = size;
        unsigned long name_ptr;
        size_t name_len;
-        u32 nritems;
        /*
         * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
-        advance = 0;
        while (1) {
                leaf = path->nodes[0];
-                nritems = btrfs_header_nritems(leaf);
                slot = path->slots[0];
                /* this is where we start walking through the path */
-                if (advance || slot >= nritems) {
+                if (slot >= btrfs_header_nritems(leaf)) {
                        /*
                         * if we've reached the last slot in this leaf we need
                         * to go to the next leaf and reset everything
                         */
-                        if (slot >= nritems-1) {
+                        ret = btrfs_next_leaf(root, path);
-                                ret = btrfs_next_leaf(root, path);
+                        if (ret < 0)
-                                if (ret)
+                                goto err;
-                                        break;
+                        else if (ret > 0)
-                                leaf = path->nodes[0];
+                                break;
-                                nritems = btrfs_header_nritems(leaf);
+                        continue;
-                                slot = path->slots[0];
-                        } else {
-                                /*
-                                 * just walking through the slots on this leaf
-                                 */
-                                slot++;
-                                path->slots[0]++;
-                        }
                }
-                advance = 1;
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                /* we are just looking for how big our buffer needs to be */
                if (!size)
-                        continue;
+                        goto next;
                if (!buffer || (name_len + 1) > size_left) {
                        ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                size_left -= name_len + 1;
                buffer += name_len + 1;
+next:
+                path->slots[0]++;
        }
        ret = total_size;
diff --git a/fs/dcache.c b/fs/dcache.c
index ad25c4cec7d5..129a35730994 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2131,7 +2131,7 @@ EXPORT_SYMBOL(d_rehash);
 */
 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 {
-        BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
        spin_lock(&dentry->d_lock);
diff --git a/fs/fhandle.c b/fs/fhandle.c
index bf93ad2bee07..6b088641f5bf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -7,6 +7,7 @@
 #include <linux/exportfs.h>
 #include <linux/fs_struct.h>
 #include <linux/fsnotify.h>
+#include <linux/personality.h>
 #include <asm/uaccess.h>
 #include "internal.h"
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 751d6b255a12..0845f84f2a5f 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs)
                        *tmp = fs->next;
                        fs->next = NULL;
                        write_unlock(&file_systems_lock);
+                        synchronize_rcu();
                        return 0;
                }
                tmp = &(*tmp)->next;
        }
        write_unlock(&file_systems_lock);
-        synchronize_rcu();
        return -EINVAL;
 }
diff --git a/fs/namei.c b/fs/namei.c
index e6cd6113872c..54fc993e3027 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -697,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
                do {
                        seq = read_seqcount_begin(&fs->seq);
                        nd->root = fs->root;
+                        nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
                } while (read_seqcount_retry(&fs->seq, seq));
        }
 }
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index b10e3540d5b7..ce4f62440425 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
        BUG_ON (!data || !frags);
+        if (size < 2 * VBLK_SIZE_HEAD) {
+                ldm_error("Value of size is to small.");
+                return false;
+        }
        group = get_unaligned_be32(data + 0x08);
        rec   = get_unaligned_be16(data + 0x0C);
        num   = get_unaligned_be16(data + 0x0E);
@@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
                ldm_error ("A VBLK claims to have %d parts.", num);
                return false;
        }
+        if (rec >= num) {
+                ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
+                return false;
+        }
        list_for_each (item, frags) {
                f = list_entry (item, struct frag, list);
@@ -1334,10 +1343,9 @@ found:
        f->map |= (1 << rec);
-        if (num > 0) {
+        data += VBLK_SIZE_HEAD;
-                data += VBLK_SIZE_HEAD;
+        size -= VBLK_SIZE_HEAD;
-                size -= VBLK_SIZE_HEAD;
-        }
        memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
        return true;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd6628d3ba42..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-        unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+        unsigned int nr;
-        struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
+        struct task_struct *reaper;
        struct tgid_iter iter;
        struct pid_namespace *ns;
+        if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+                goto out_no_task;
+        nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+        reaper = get_proc_task(filp->f_path.dentry->d_inode);
        if (!reaper)
                goto out_no_task;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9eead2c796b7..fbb0b478a346 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
                SetPageDirty(page);
                unlock_page(page);
+                put_page(page);
        }
        return 0;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 919f0de29d8f..e6493cac193d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,6 +23,12 @@
 #ifndef __UBIFS_DEBUG_H__
 #define __UBIFS_DEBUG_H__
+/* Checking helper functions */
+typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
+                                 struct ubifs_zbranch *zbr, void *priv);
+typedef int (*dbg_znode_callback)(struct ubifs_info *c,
+                                  struct ubifs_znode *znode, void *priv);
 #ifdef CONFIG_UBIFS_FS_DEBUG
 /**
@@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
 void dbg_dump_index(struct ubifs_info *c);
 void dbg_dump_lpt_lebs(const struct ubifs_info *c);
-/* Checking helper functions */
-typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
-                                 struct ubifs_zbranch *zbr, void *priv);
-typedef int (*dbg_znode_callback)(struct ubifs_info *c,
-                                  struct ubifs_znode *znode, void *priv);
 int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
                   dbg_znode_callback znode_cb, void *priv);
@@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
 int dbg_check_filesystem(struct ubifs_info *c);
 void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
                    int add_pos);
-int dbg_check_lprops(struct ubifs_info *c);
 int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
                        int row, int col);
 int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
@@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
 #define DBGKEY(key)  ((char *)(key))
 #define DBGKEY1(key) ((char *)(key))
-#define ubifs_debugging_init(c)                0
+static inline int ubifs_debugging_init(struct ubifs_info *c)      { return 0; }
-#define ubifs_debugging_exit(c)                ({})
+static inline void ubifs_debugging_exit(struct ubifs_info *c)     { return; }
+static inline const char *dbg_ntype(int type)                     { return ""; }
-#define dbg_ntype(type)                        ""
+static inline const char *dbg_cstate(int cmt_state)               { return ""; }
-#define dbg_cstate(cmt_state)                  ""
+static inline const char *dbg_jhead(int jhead)                    { return ""; }
-#define dbg_jhead(jhead)                       ""
+static inline const char *
-#define dbg_get_key_dump(c, key)               ({})
+dbg_get_key_dump(const struct ubifs_info *c,
-#define dbg_dump_inode(c, inode)               ({})
+                 const union ubifs_key *key)                      { return ""; }
-#define dbg_dump_node(c, node)                 ({})
+static inline void dbg_dump_inode(const struct ubifs_info *c,
-#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
+                                  const struct inode *inode)      { return; }
-#define dbg_dump_budget_req(req)               ({})
+static inline void dbg_dump_node(const struct ubifs_info *c,
-#define dbg_dump_lstats(lst)                   ({})
+                                 const void *node)                { return; }
-#define dbg_dump_budg(c)                       ({})
+static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
-#define dbg_dump_lprop(c, lp)                  ({})
+                                     void *node, int lnum,
-#define dbg_dump_lprops(c)                     ({})
+                                     int offs)                    { return; }
-#define dbg_dump_lpt_info(c)                   ({})
+static inline void
-#define dbg_dump_leb(c, lnum)                  ({})
+dbg_dump_budget_req(const struct ubifs_budget_req *req)           { return; }
-#define dbg_dump_znode(c, znode)               ({})
+static inline void
-#define dbg_dump_heap(c, heap, cat)            ({})
+dbg_dump_lstats(const struct ubifs_lp_stats *lst)                 { return; }
-#define dbg_dump_pnode(c, pnode, parent, iip)  ({})
+static inline void dbg_dump_budg(struct ubifs_info *c)            { return; }
-#define dbg_dump_tnc(c)                        ({})
+static inline void dbg_dump_lprop(const struct ubifs_info *c,
-#define dbg_dump_index(c)                      ({})
+                                  const struct ubifs_lprops *lp)  { return; }
-#define dbg_dump_lpt_lebs(c)                   ({})
+static inline void dbg_dump_lprops(struct ubifs_info *c)          { return; }
+static inline void dbg_dump_lpt_info(struct ubifs_info *c)        { return; }
-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
+static inline void dbg_dump_leb(const struct ubifs_info *c,
-#define dbg_old_index_check_init(c, zroot)         0
+                                int lnum)                         { return; }
-#define dbg_save_space_info(c)                     ({})
+static inline void
-#define dbg_check_space_info(c)                    0
+dbg_dump_znode(const struct ubifs_info *c,
-#define dbg_check_old_index(c, zroot)              0
+               const struct ubifs_znode *znode)                   { return; }
-#define dbg_check_cats(c)                          0
+static inline void dbg_dump_heap(struct ubifs_info *c,
-#define dbg_check_ltab(c)                          0
+                                 struct ubifs_lpt_heap *heap,
-#define dbg_chk_lpt_free_spc(c)                    0
+                                 int cat)                         { return; }
-#define dbg_chk_lpt_sz(c, action, len)             0
+static inline void dbg_dump_pnode(struct ubifs_info *c,
-#define dbg_check_synced_i_size(inode)             0
+                                  struct ubifs_pnode *pnode,
-#define dbg_check_dir_size(c, dir)                 0
+                                  struct ubifs_nnode *parent,
-#define dbg_check_tnc(c, x)                        0
+                                  int iip)                        { return; }
-#define dbg_check_idx_size(c, idx_size)            0
+static inline void dbg_dump_tnc(struct ubifs_info *c)             { return; }
-#define dbg_check_filesystem(c)                    0
+static inline void dbg_dump_index(struct ubifs_info *c)           { return; }
-#define dbg_check_heap(c, heap, cat, add_pos)      ({})
+static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c)  { return; }
-#define dbg_check_lprops(c)                        0
-#define dbg_check_lpt_nodes(c, cnode, row, col)    0
+static inline int dbg_walk_index(struct ubifs_info *c,
-#define dbg_check_inode_size(c, inode, size)       0
+                                 dbg_leaf_callback leaf_cb,
-#define dbg_check_data_nodes_order(c, head)        0
+                                 dbg_znode_callback znode_cb,
-#define dbg_check_nondata_nodes_order(c, head)     0
+                                 void *priv)                      { return 0; }
-#define dbg_force_in_the_gaps_enabled              0
+static inline void dbg_save_space_info(struct ubifs_info *c)      { return; }
-#define dbg_force_in_the_gaps()                    0
+static inline int dbg_check_space_info(struct ubifs_info *c)      { return 0; }
-#define dbg_failure_mode                           0
+static inline int dbg_check_lprops(struct ubifs_info *c)          { return 0; }
+static inline int
-#define dbg_debugfs_init()                         0
+dbg_old_index_check_init(struct ubifs_info *c,
-#define dbg_debugfs_exit()
+                         struct ubifs_zbranch *zroot)             { return 0; }
-#define dbg_debugfs_init_fs(c)                     0
+static inline int
-#define dbg_debugfs_exit_fs(c)                     0
+dbg_check_old_index(struct ubifs_info *c,
+                    struct ubifs_zbranch *zroot)                  { return 0; }
+static inline int dbg_check_cats(struct ubifs_info *c)            { return 0; }
+static inline int dbg_check_ltab(struct ubifs_info *c)            { return 0; }
+static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c)      { return 0; }
+static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
+                                 int action, int len)             { return 0; }
+static inline int dbg_check_synced_i_size(struct inode *inode)    { return 0; }
+static inline int dbg_check_dir_size(struct ubifs_info *c,
+                                     const struct inode *dir)     { return 0; }
+static inline int dbg_check_tnc(struct ubifs_info *c, int extra)  { return 0; }
+static inline int dbg_check_idx_size(struct ubifs_info *c,
+                                     long long idx_size)          { return 0; }
+static inline int dbg_check_filesystem(struct ubifs_info *c)      { return 0; }
+static inline void dbg_check_heap(struct ubifs_info *c,
+                                  struct ubifs_lpt_heap *heap,
+                                  int cat, int add_pos)           { return; }
+static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
+        struct ubifs_cnode *cnode, int row, int col)              { return 0; }
+static inline int dbg_check_inode_size(struct ubifs_info *c,
+                                       const struct inode *inode,
+                                       loff_t size)               { return 0; }
+static inline int
+dbg_check_data_nodes_order(struct ubifs_info *c,
+                           struct list_head *head)                { return 0; }
+static inline int
+dbg_check_nondata_nodes_order(struct ubifs_info *c,
+                              struct list_head *head)             { return 0; }
+static inline int dbg_force_in_the_gaps(void)                     { return 0; }
+#define dbg_force_in_the_gaps_enabled 0
+#define dbg_failure_mode              0
+static inline int dbg_debugfs_init(void)                          { return 0; }
+static inline void dbg_debugfs_exit(void)                         { return; }
+static inline int dbg_debugfs_init_fs(struct ubifs_info *c)       { return 0; }
+static inline int dbg_debugfs_exit_fs(struct ubifs_info *c)       { return 0; }
 #endif /* !CONFIG_UBIFS_FS_DEBUG */
 #endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 28be1e6a65e8..b286db79c686 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1312,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync)
        dbg_gen("syncing inode %lu", inode->i_ino);
+        if (inode->i_sb->s_flags & MS_RDONLY)
+                return 0;
        /*
         * VFS has already synchronized dirty pages for this inode. Synchronize
         * the inode unless this is a 'datasync()' call.