168 files changed, 4247 insertions, 1876 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 48d4215c60a8..c55c614500ad 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -68,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
 * v9fs_fill_super - populate superblock with info
 * @sb: superblock
 * @v9ses: session information
- * @flags: flags propagated from v9fs_get_sb()
+ * @flags: flags propagated from v9fs_mount()
 *
 */
@@ -99,18 +99,16 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 }
 /**
- * v9fs_get_sb - mount a superblock
+ * v9fs_mount - mount a superblock
 * @fs_type: file system type
 * @flags: mount flags
 * @dev_name: device name that was mounted
 * @data: mount options
- * @mnt: mountpoint record to be instantiated
 *
 */
-static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
-                       const char *dev_name, void *data,
+                       const char *dev_name, void *data)
-                       struct vfsmount *mnt)
 {
        struct super_block *sb = NULL;
        struct inode *inode = NULL;
@@ -124,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
        if (!v9ses)
-                return -ENOMEM;
+                return ERR_PTR(-ENOMEM);
        fid = v9fs_session_init(v9ses, dev_name, data);
        if (IS_ERR(fid)) {
@@ -186,15 +184,15 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        v9fs_fid_add(root, fid);
        P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
-        simple_set_mnt(mnt, sb);
+        return dget(sb->s_root);
-        return 0;
 clunk_fid:
        p9_client_clunk(fid);
 close_session:
        v9fs_session_close(v9ses);
        kfree(v9ses);
-        return retval;
+        return ERR_PTR(retval);
 release_sb:
        /*
         * we will do the session_close and root dentry release
@@ -204,7 +202,7 @@ release_sb:
         */
        p9_client_clunk(fid);
        deactivate_locked_super(sb);
-        return retval;
+        return ERR_PTR(retval);
 }
 /**
@@ -300,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
 struct file_system_type v9fs_fs_type = {
        .name = "9p",
-        .get_sb = v9fs_get_sb,
+        .mount = v9fs_mount,
        .kill_sb = v9fs_kill_super,
        .owner = THIS_MODULE,
        .fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236f..959dbff2d42d 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
        return -EINVAL;
 }
-static int adfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *adfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
-                           mnt);
 }
 static struct file_system_type adfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "adfs",
-        .get_sb         = adfs_get_sb,
+        .mount          = adfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238a..0cf7f4384cbd 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
        return 0;
 }
-static int affs_get_sb(struct file_system_type *fs_type,
+static struct dentry *affs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
-                           mnt);
 }
 static struct file_system_type affs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "affs",
-        .get_sb         = affs_get_sb,
+        .mount          = affs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d98ae0..27201cffece4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
 static void afs_i_init_once(void *foo);
-static int afs_get_sb(struct file_system_type *fs_type,
+static struct dentry *afs_mount(struct file_system_type *fs_type,
-                      int flags, const char *dev_name,
+                      int flags, const char *dev_name, void *data);
-                      void *data, struct vfsmount *mnt);
 static struct inode *afs_alloc_inode(struct super_block *sb);
 static void afs_put_super(struct super_block *sb);
 static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
 struct file_system_type afs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "afs",
-        .get_sb         = afs_get_sb,
+        .mount          = afs_mount,
        .kill_sb        = kill_anon_super,
        .fs_flags       = 0,
 };
@@ -359,11 +358,8 @@ error:
 /*
 * get an AFS superblock
 */
-static int afs_get_sb(struct file_system_type *fs_type,
+static struct dentry *afs_mount(struct file_system_type *fs_type,
-                      int flags,
+                      int flags, const char *dev_name, void *options)
-                      const char *dev_name,
-                      void *options,
-                      struct vfsmount *mnt)
 {
        struct afs_mount_params params;
        struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
                ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
        }
-        simple_set_mnt(mnt, sb);
        afs_put_volume(params.volume);
        afs_put_cell(params.cell);
        kfree(new_opts);
        _leave(" = 0 [%p]", sb);
-        return 0;
+        return dget(sb->s_root);
 error:
        afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
        key_put(params.key);
        kfree(new_opts);
        _leave(" = %d", ret);
-        return ret;
+        return ERR_PTR(ret);
 }
 /*
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 5365527ca43f..57ce55b2564c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
 static struct inode *anon_inode_inode;
 static const struct file_operations anon_inode_fops;
-static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
-                               const char *dev_name, void *data,
+                                int flags, const char *dev_name, void *data)
-                               struct vfsmount *mnt)
 {
-        return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC,
+        return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
-                             mnt);
 }
 /*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
 static struct file_system_type anon_inode_fs_type = {
        .name           = "anon_inodefs",
-        .get_sb         = anon_inodefs_get_sb,
+        .mount          = anon_inodefs_mount,
        .kill_sb        = kill_anon_super,
 };
 static const struct dentry_operations anon_inodefs_dentry_operations = {
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd8957..c038727b4050 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
 #include <linux/init.h>
 #include "autofs_i.h"
-static int autofs_get_sb(struct file_system_type *fs_type,
+static struct dentry *autofs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
+        return mount_nodev(fs_type, flags, data, autofs4_fill_super);
 }
 static struct file_system_type autofs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "autofs",
-        .get_sb         = autofs_get_sb,
+        .mount          = autofs_mount,
        .kill_sb        = autofs4_kill_sb,
 };
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d2824885..aa4e7c7ae3c6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
        return 0;
 }
-static int
+static struct dentry *
-befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
+befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
-            void *data, struct vfsmount *mnt)
+            void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
-                           mnt);
 }
 static struct file_system_type befs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "befs",
-        .get_sb         = befs_get_sb,
+        .mount          = befs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,      
 };
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a8..76db6d7d49bb 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
        return ret;
 }
-static int bfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *bfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
 }
 static struct file_system_type bfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "bfs",
-        .get_sb         = bfs_get_sb,
+        .mount          = bfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 29990f0eee0c..1befe2ec8186 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -706,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
        return err;
 }
-static int bm_get_sb(struct file_system_type *fs_type,
+static struct dentry *bm_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
+        return mount_single(fs_type, flags, data, bm_fill_super);
 }
 static struct linux_binfmt misc_format = {
@@ -720,7 +720,7 @@ static struct linux_binfmt misc_format = {
 static struct file_system_type bm_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "binfmt_misc",
-        .get_sb         = bm_get_sb,
+        .mount          = bm_mount,
        .kill_sb        = kill_litter_super,
 };
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7c..4bd454fa844e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
 {
        struct bio *bio;
+        if (nr_iovecs > UIO_MAXIOV)
+                return NULL;
        bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
                      gfp_mask);
        if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
 static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
                                               gfp_t gfp_mask)
 {
-        struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
+        struct bio_map_data *bmd;
+        if (iov_count > UIO_MAXIOV)
+                return NULL;
+        bmd = kmalloc(sizeof(*bmd), gfp_mask);
        if (!bmd)
                return NULL;
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
                end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
                start = uaddr >> PAGE_SHIFT;
+                /*
+                 * Overflow, abort
+                 */
+                if (end < start)
+                        return ERR_PTR(-EINVAL);
                nr_pages += end - start;
                len += iov[i].iov_len;
        }
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
                unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
                unsigned long start = uaddr >> PAGE_SHIFT;
+                /*
+                 * Overflow, abort
+                 */
+                if (end < start)
+                        return ERR_PTR(-EINVAL);
                nr_pages += end - start;
                /*
                 * buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
                unsigned long start = uaddr >> PAGE_SHIFT;
                const int local_nr_pages = end - start;
                const int page_limit = cur_page + local_nr_pages;
-                
                ret = get_user_pages_fast(uaddr, local_nr_pages,
                                write_to_vm, &pages[cur_page]);
                if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index dea3b628a6ce..06e8ff12b97c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -464,15 +464,15 @@ static const struct super_operations bdev_sops = {
        .evict_inode = bdev_evict_inode,
 };
-static int bd_get_sb(struct file_system_type *fs_type,
+static struct dentry *bd_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
+        return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
 }
 static struct file_system_type bd_type = {
        .name           = "bdev",
-        .get_sb         = bd_get_sb,
+        .mount          = bd_mount,
        .kill_sb        = kill_anon_super,
 };
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a2..7845d1f7d1d9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,7 +163,6 @@ fail:
 */
 static void end_compressed_bio_read(struct bio *bio, int err)
 {
-        struct extent_io_tree *tree;
        struct compressed_bio *cb = bio->bi_private;
        struct inode *inode;
        struct page *page;
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
        /* ok, we're the last bio for this extent, lets start
         * the decompression.
         */
-        tree = &BTRFS_I(inode)->io_tree;
        ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
                                        cb->start,
                                        cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc2..9ac171599258 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
                      struct extent_buffer **cow_ret, u64 new_root_objectid)
 {
        struct extent_buffer *cow;
-        u32 nritems;
        int ret = 0;
        int level;
        struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
        level = btrfs_header_level(buf);
-        nritems = btrfs_header_nritems(buf);
        if (level == 0)
                btrfs_item_key(buf, &disk_key, 0);
        else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        int wret;
        int pslot;
        int orig_slot = path->slots[level];
-        int err_on_enospc = 0;
        u64 orig_ptr;
        if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
-        if (btrfs_header_nritems(mid) < 2)
+        btrfs_header_nritems(mid);
-                err_on_enospc = 1;
        left = read_node_slot(root, parent, pslot - 1);
        if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                wret = push_node_left(trans, root, left, mid, 1);
                if (wret < 0)
                        ret = wret;
-                if (btrfs_header_nritems(mid) < 2)
+                btrfs_header_nritems(mid);
-                        err_on_enospc = 1;
        }
        /*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
        int wret;
        int pslot;
        int orig_slot = path->slots[level];
-        u64 orig_ptr;
        if (level == 0)
                return 1;
        mid = path->nodes[level];
        WARN_ON(btrfs_header_generation(mid) != trans->transid);
-        orig_ptr = btrfs_node_blockptr(mid, orig_slot);
        if (level < BTRFS_MAX_LEVEL - 1)
                parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        blocksize = btrfs_level_size(root, level - 1);
        tmp = btrfs_find_tree_block(root, blocknr, blocksize);
-        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+        if (tmp) {
-                /*
+                if (btrfs_buffer_uptodate(tmp, 0)) {
-                 * we found an up to date block without sleeping, return
+                        if (btrfs_buffer_uptodate(tmp, gen)) {
-                 * right away
+                                /*
-                 */
+                                 * we found an up to date block without
-                *eb_ret = tmp;
+                                 * sleeping, return
-                return 0;
+                                 * right away
+                                 */
+                                *eb_ret = tmp;
+                                return 0;
+                        }
+                        /* the pages were up to date, but we failed
+                         * the generation number check.  Do a full
+                         * read for the generation number that is correct.
+                         * We must do this without dropping locks so
+                         * we can trust our generation number
+                         */
+                        free_extent_buffer(tmp);
+                        tmp = read_tree_block(root, blocknr, blocksize, gen);
+                        if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+                                *eb_ret = tmp;
+                                return 0;
+                        }
+                        free_extent_buffer(tmp);
+                        btrfs_release_path(NULL, p);
+                        return -EIO;
+                }
        }
        /*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        btrfs_unlock_up_safe(p, level + 1);
        btrfs_set_path_blocking(p);
-        if (tmp)
+        free_extent_buffer(tmp);
-                free_extent_buffer(tmp);
        if (p->reada)
                reada_for_search(root, p, level, slot, key->objectid);
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
-        int slot;
        int i;
        int push_space = 0;
        int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        u32 this_item_size;
        u32 old_left_item_size;
-        slot = path->slots[1];
        if (empty)
                nr = min(right_nritems, max_slot);
        else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 {
        int ret = 0;
        int slot;
-        int slot_orig;
        struct extent_buffer *leaf;
        struct btrfs_item *item;
        u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
        unsigned int size_diff;
        int i;
-        slot_orig = path->slots[0];
        leaf = path->nodes[0];
        slot = path->slots[0];
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 {
        int ret = 0;
        int slot;
-        int slot_orig;
        struct extent_buffer *leaf;
        struct btrfs_item *item;
        u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
        unsigned int old_size;
        int i;
-        slot_orig = path->slots[0];
        leaf = path->nodes[0];
        nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                            struct btrfs_key *cpu_key, u32 *data_size,
                            int nr)
 {
-        struct extent_buffer *leaf;
        int ret = 0;
        int slot;
        int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
-        leaf = path->nodes[0];
        slot = path->slots[0];
        BUG_ON(slot < 0);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad17..8db9234f6b41 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
 */
 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
@@ -265,6 +268,22 @@ struct btrfs_chunk {
        /* additional stripes go here */
 } __attribute__ ((__packed__));
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+struct btrfs_free_space_entry {
+        __le64 offset;
+        __le64 bytes;
+        u8 type;
+} __attribute__ ((__packed__));
+struct btrfs_free_space_header {
+        struct btrfs_disk_key location;
+        __le64 generation;
+        __le64 num_entries;
+        __le64 num_bitmaps;
+} __attribute__ ((__packed__));
 static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 {
        BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
        char label[BTRFS_LABEL_SIZE];
+        __le64 cache_generation;
        /* future expansion */
-        __le64 reserved[32];
+        __le64 reserved[31];
        u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
 } __attribute__ ((__packed__));
@@ -375,13 +396,15 @@ struct btrfs_super_block {
 * ones specified below then we will fail to mount
 */
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF    (1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL   (2ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL   (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS     (1ULL << 2)
 #define BTRFS_FEATURE_COMPAT_SUPP               0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP            0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP             \
+#define BTRFS_FEATURE_INCOMPAT_SUPP                     \
-        (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+        (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
-         BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+         BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
+         BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
 /*
 * A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
 struct btrfs_space_info {
        u64 flags;
-        u64 total_bytes;        /* total bytes in the space */
+        u64 total_bytes;        /* total bytes in the space,
+                                   this doesn't take mirrors into account */
        u64 bytes_used;         /* total bytes used,
                                   this does't take mirrors into account */
        u64 bytes_pinned;       /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
        u64 bytes_may_use;      /* number of bytes that may be used for
                                   delalloc/allocations */
        u64 disk_used;          /* total bytes used on disk */
+        u64 disk_total;         /* total bytes on disk, takes mirrors into
+                                   account */
        int full;               /* indicates that we cannot allocate any more
                                   chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
        BTRFS_CACHE_FINISHED    = 2,
 };
+enum btrfs_disk_cache_state {
+        BTRFS_DC_WRITTEN        = 0,
+        BTRFS_DC_ERROR          = 1,
+        BTRFS_DC_CLEAR          = 2,
+        BTRFS_DC_SETUP          = 3,
+        BTRFS_DC_NEED_WRITE     = 4,
+};
 struct btrfs_caching_control {
        struct list_head list;
        struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
        struct btrfs_key key;
        struct btrfs_block_group_item item;
        struct btrfs_fs_info *fs_info;
+        struct inode *inode;
        spinlock_t lock;
        u64 pinned;
        u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
        int extents_thresh;
        int free_extents;
        int total_bitmaps;
-        int ro;
+        int ro:1;
-        int dirty;
+        int dirty:1;
+        int iref:1;
+        int disk_cache_state;
        /* cache tracking stuff */
        int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
        struct btrfs_transaction *running_transaction;
        wait_queue_head_t transaction_throttle;
        wait_queue_head_t transaction_wait;
+        wait_queue_head_t transaction_blocked_wait;
        wait_queue_head_t async_submit_wait;
        struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
        struct btrfs_workers endio_meta_workers;
        struct btrfs_workers endio_meta_write_workers;
        struct btrfs_workers endio_write_workers;
+        struct btrfs_workers endio_freespace_worker;
        struct btrfs_workers submit_workers;
        /*
         * fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
 #define BTRFS_MOUNT_NOSSD               (1 << 9)
 #define BTRFS_MOUNT_DISCARD             (1 << 10)
 #define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
+#define BTRFS_MOUNT_SPACE_CACHE         (1 << 12)
+#define BTRFS_MOUNT_CLEAR_CACHE         (1 << 13)
+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
 #define btrfs_clear_opt(o, opt)         ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)           ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
        write_eb_member(eb, item, struct btrfs_dir_item, location, key);
 }
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+                   num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+                   num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+                   generation, 64);
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+                                        struct btrfs_free_space_header *h,
+                                        struct btrfs_disk_key *key)
+{
+        read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+                                            struct btrfs_free_space_header *h,
+                                            struct btrfs_disk_key *key)
+{
+        write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
 /* struct btrfs_disk_key */
 BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
                         objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
                         incompat_flags, 64);
 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
                         csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+                         cache_generation, 64);
 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
 {
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
        return file->f_path.dentry;
 }
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+        return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+                (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
 /* extent-tree.c */
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                                int num_items, int *retries);
+                                int num_items);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct btrfs_block_rsv *block_rsv,
-                        u64 num_bytes, int *retries);
+                        u64 num_bytes);
 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                             struct btrfs_block_group_cache *cache);
 int btrfs_set_block_group_rw(struct btrfs_root *root,
                             struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               u32 min_type);
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                   int sync);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                              struct extent_state **cached_state);
 int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
 int btrfs_prealloc_file_range(struct inode *inode, int mode,
                              u64 start, u64 num_bytes, u64 min_size,
                              loff_t actual_len, u64 *alloc_hint);
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+                                    struct btrfs_trans_handle *trans, int mode,
+                                    u64 start, u64 num_bytes, u64 min_size,
+                                    loff_t actual_len, u64 *alloc_hint);
 extern const struct dentry_operations btrfs_dentry_operations;
 /* ioctl.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa49..f0cad5ae5be7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
                ret = btrfs_truncate_item(trans, root, path,
                                          item_len - sub_item_len, 1);
        }
-        return 0;
+        return ret;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed0..fb827d0d7181 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
        struct extent_io_tree *tree;
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 found_start;
-        int found_level;
        unsigned long len;
        struct extent_buffer *eb;
        int ret;
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
                WARN_ON(1);
                goto err;
        }
-        found_level = btrfs_header_level(eb);
        csum_tree_block(root, eb, 0);
 err:
        free_extent_buffer(eb);
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
        end_io_wq->work.flags = 0;
        if (bio->bi_rw & REQ_WRITE) {
-                if (end_io_wq->metadata)
+                if (end_io_wq->metadata == 1)
                        btrfs_queue_worker(&fs_info->endio_meta_write_workers,
                                           &end_io_wq->work);
+                else if (end_io_wq->metadata == 2)
+                        btrfs_queue_worker(&fs_info->endio_freespace_worker,
+                                           &end_io_wq->work);
                else
                        btrfs_queue_worker(&fs_info->endio_write_workers,
                                           &end_io_wq->work);
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
        }
 }
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
                        int metadata)
 {
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
 static void run_one_async_start(struct btrfs_work *work)
 {
-        struct btrfs_fs_info *fs_info;
        struct async_submit_bio *async;
        async = container_of(work, struct  async_submit_bio, work);
-        fs_info = BTRFS_I(async->inode)->root->fs_info;
        async->submit_bio_start(async->inode, async->rw, async->bio,
                               async->mirror_num, async->bio_flags,
                               async->bio_offset);
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
                                      u32 blocksize, u64 parent_transid)
 {
        struct extent_buffer *buf = NULL;
-        struct inode *btree_inode = root->fs_info->btree_inode;
-        struct extent_io_tree *io_tree;
        int ret;
-        io_tree = &BTRFS_I(btree_inode)->io_tree;
        buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
        if (!buf)
                return NULL;
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
        u64 start = 0;
        struct page *page;
        struct extent_io_tree *io_tree = NULL;
-        struct btrfs_fs_info *info = NULL;
        struct bio_vec *bvec;
        int i;
        int ret;
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
                buf_len = page->private >> 2;
                start = page_offset(page) + bvec->bv_offset;
                io_tree = &BTRFS_I(page->mapping->host)->io_tree;
-                info = BTRFS_I(page->mapping->host)->root->fs_info;
        }
        /* are we fully contained in this bio? */
        if (buf_len <= length)
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        init_waitqueue_head(&fs_info->transaction_throttle);
        init_waitqueue_head(&fs_info->transaction_wait);
+        init_waitqueue_head(&fs_info->transaction_blocked_wait);
        init_waitqueue_head(&fs_info->async_submit_wait);
        __setup_root(4096, 4096, 4096, 4096, tree_root,
                     fs_info, BTRFS_ROOT_TREE_OBJECTID);
        bh = btrfs_read_dev_super(fs_devices->latest_bdev);
        if (!bh)
                goto fail_iput;
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
                           fs_info->thread_pool_size,
                           &fs_info->generic_worker);
+        btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+                           1, &fs_info->generic_worker);
        /*
         * endios are largely parallel and should have a very
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        btrfs_start_workers(&fs_info->endio_meta_workers, 1);
        btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
        btrfs_start_workers(&fs_info->endio_write_workers, 1);
+        btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        if (!(sb->s_flags & MS_RDONLY)) {
                down_read(&fs_info->cleanup_work_sem);
                btrfs_orphan_cleanup(fs_info->fs_root);
+                btrfs_orphan_cleanup(fs_info->tree_root);
                up_read(&fs_info->cleanup_work_sem);
        }
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
+        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
+        btrfs_put_block_group_cache(fs_info);
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret =  btrfs_commit_super(root);
                if (ret)
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_stop_workers(&fs_info->endio_meta_workers);
        btrfs_stop_workers(&fs_info->endio_meta_write_workers);
        btrfs_stop_workers(&fs_info->endio_write_workers);
+        btrfs_stop_workers(&fs_info->endio_freespace_worker);
        btrfs_stop_workers(&fs_info->submit_workers);
        btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101c..0c097f3aec41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
                return NULL;
        }
+        /* We're loading it the fast way, so we don't have a caching_ctl. */
+        if (!cache->caching_ctl) {
+                spin_unlock(&cache->lock);
+                return NULL;
+        }
        ctl = cache->caching_ctl;
        atomic_inc(&ctl->count);
        spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
        return 0;
 }
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+                             struct btrfs_trans_handle *trans,
+                             int load_cache_only)
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
        if (cache->cached != BTRFS_CACHE_NO)
                return 0;
+        /*
+         * We can't do the read from on-disk cache during a commit since we need
+         * to have the normal tree locking.
+         */
+        if (!trans->transaction->in_commit) {
+                spin_lock(&cache->lock);
+                if (cache->cached != BTRFS_CACHE_NO) {
+                        spin_unlock(&cache->lock);
+                        return 0;
+                }
+                cache->cached = BTRFS_CACHE_STARTED;
+                spin_unlock(&cache->lock);
+                ret = load_free_space_cache(fs_info, cache);
+                spin_lock(&cache->lock);
+                if (ret == 1) {
+                        cache->cached = BTRFS_CACHE_FINISHED;
+                        cache->last_byte_to_unpin = (u64)-1;
+                } else {
+                        cache->cached = BTRFS_CACHE_NO;
+                }
+                spin_unlock(&cache->lock);
+                if (ret == 1)
+                        return 0;
+        }
+        if (load_cache_only)
+                return 0;
        caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
        BUG_ON(!caching_ctl);
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
        rcu_read_lock();
        list_for_each_entry_rcu(found, head, list) {
-                if (found->flags == flags) {
+                if (found->flags & flags) {
                        rcu_read_unlock();
                        return found;
                }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
        return num;
 }
+static u64 div_factor_fine(u64 num, int factor)
+{
+        if (factor == 100)
+                return num;
+        num *= factor;
+        do_div(num, 100);
+        return num;
+}
 u64 btrfs_find_block_group(struct btrfs_root *root,
                           u64 search_start, u64 search_hint, int owner)
 {
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
        return cache;
 }
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+                            struct btrfs_trans_handle *trans,
+                            struct btrfs_path *path)
+{
+        struct btrfs_root *root = block_group->fs_info->tree_root;
+        struct inode *inode = NULL;
+        u64 alloc_hint = 0;
+        int num_pages = 0;
+        int retries = 0;
+        int ret = 0;
+        /*
+         * If this block group is smaller than 100 megs don't bother caching the
+         * block group.
+         */
+        if (block_group->key.offset < (100 * 1024 * 1024)) {
+                spin_lock(&block_group->lock);
+                block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+                spin_unlock(&block_group->lock);
+                return 0;
+        }
+again:
+        inode = lookup_free_space_inode(root, block_group, path);
+        if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+                ret = PTR_ERR(inode);
+                btrfs_release_path(root, path);
+                goto out;
+        }
+        if (IS_ERR(inode)) {
+                BUG_ON(retries);
+                retries++;
+                if (block_group->ro)
+                        goto out_free;
+                ret = create_free_space_inode(root, trans, block_group, path);
+                if (ret)
+                        goto out_free;
+                goto again;
+        }
+        /*
+         * We want to set the generation to 0, that way if anything goes wrong
+         * from here on out we know not to trust this cache when we load up next
+         * time.
+         */
+        BTRFS_I(inode)->generation = 0;
+        ret = btrfs_update_inode(trans, root, inode);
+        WARN_ON(ret);
+        if (i_size_read(inode) > 0) {
+                ret = btrfs_truncate_free_space_cache(root, trans, path,
+                                                      inode);
+                if (ret)
+                        goto out_put;
+        }
+        spin_lock(&block_group->lock);
+        if (block_group->cached != BTRFS_CACHE_FINISHED) {
+                spin_unlock(&block_group->lock);
+                goto out_put;
+        }
+        spin_unlock(&block_group->lock);
+        num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+        if (!num_pages)
+                num_pages = 1;
+        /*
+         * Just to make absolutely sure we have enough space, we're going to
+         * preallocate 12 pages worth of space for each block group.  In
+         * practice we ought to use at most 8, but we need extra space so we can
+         * add our header and have a terminator between the extents and the
+         * bitmaps.
+         */
+        num_pages *= 16;
+        num_pages *= PAGE_CACHE_SIZE;
+        ret = btrfs_check_data_free_space(inode, num_pages);
+        if (ret)
+                goto out_put;
+        ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+                                              num_pages, num_pages,
+                                              &alloc_hint);
+        btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+        iput(inode);
+out_free:
+        btrfs_release_path(root, path);
+out:
+        spin_lock(&block_group->lock);
+        if (ret)
+                block_group->disk_cache_state = BTRFS_DC_ERROR;
+        else
+                block_group->disk_cache_state = BTRFS_DC_SETUP;
+        spin_unlock(&block_group->lock);
+        return ret;
+}
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
+again:
+        while (1) {
+                cache = btrfs_lookup_first_block_group(root->fs_info, last);
+                while (cache) {
+                        if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                                break;
+                        cache = next_block_group(root, cache);
+                }
+                if (!cache) {
+                        if (last == 0)
+                                break;
+                        last = 0;
+                        continue;
+                }
+                err = cache_save_setup(cache, trans, path);
+                last = cache->key.objectid + cache->key.offset;
+                btrfs_put_block_group(cache);
+        }
        while (1) {
                if (last == 0) {
                        err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                cache = btrfs_lookup_first_block_group(root->fs_info, last);
                while (cache) {
+                        if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+                                btrfs_put_block_group(cache);
+                                goto again;
+                        }
                        if (cache->dirty)
                                break;
                        cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                        continue;
                }
+                if (cache->disk_cache_state == BTRFS_DC_SETUP)
+                        cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
                cache->dirty = 0;
                last = cache->key.objectid + cache->key.offset;
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                btrfs_put_block_group(cache);
        }
+        while (1) {
+                /*
+                 * I don't think this is needed since we're just marking our
+                 * preallocated extent as written, but just in case it can't
+                 * hurt.
+                 */
+                if (last == 0) {
+                        err = btrfs_run_delayed_refs(trans, root,
+                                                     (unsigned long)-1);
+                        BUG_ON(err);
+                }
+                cache = btrfs_lookup_first_block_group(root->fs_info, last);
+                while (cache) {
+                        /*
+                         * Really this shouldn't happen, but it could if we
+                         * couldn't write the entire preallocated extent and
+                         * splitting the extent resulted in a new block.
+                         */
+                        if (cache->dirty) {
+                                btrfs_put_block_group(cache);
+                                goto again;
+                        }
+                        if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+                                break;
+                        cache = next_block_group(root, cache);
+                }
+                if (!cache) {
+                        if (last == 0)
+                                break;
+                        last = 0;
+                        continue;
+                }
+                btrfs_write_out_cache(root, trans, cache, path);
+                /*
+                 * If we didn't have an error then the cache state is still
+                 * NEED_WRITE, so we can set it to WRITTEN.
+                 */
+                if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+                        cache->disk_cache_state = BTRFS_DC_WRITTEN;
+                last = cache->key.objectid + cache->key.offset;
+                btrfs_put_block_group(cache);
+        }
        btrfs_free_path(path);
        return 0;
 }
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        if (found) {
                spin_lock(&found->lock);
                found->total_bytes += total_bytes;
+                found->disk_total += total_bytes * factor;
                found->bytes_used += bytes_used;
                found->disk_used += bytes_used * factor;
                found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                                BTRFS_BLOCK_GROUP_SYSTEM |
                                BTRFS_BLOCK_GROUP_METADATA);
        found->total_bytes = total_bytes;
+        found->disk_total = total_bytes * factor;
        found->bytes_used = bytes_used;
        found->disk_used = bytes_used * factor;
        found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
        struct btrfs_space_info *data_sinfo;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 used;
-        int ret = 0, committed = 0;
+        int ret = 0, committed = 0, alloc_chunk = 1;
        /* make sure bytes are sectorsize aligned */
        bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+        if (root == root->fs_info->tree_root) {
+                alloc_chunk = 0;
+                committed = 1;
+        }
        data_sinfo = BTRFS_I(inode)->space_info;
        if (!data_sinfo)
                goto alloc;
@@ -2905,7 +3134,7 @@ again:
                 * if we don't have enough free bytes in this space then we need
                 * to alloc a new chunk.
                 */
-                if (!data_sinfo->full) {
+                if (!data_sinfo->full && alloc_chunk) {
                        u64 alloc_target;
                        data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
-static int should_alloc_chunk(struct btrfs_space_info *sinfo,
+static int should_alloc_chunk(struct btrfs_root *root,
-                              u64 alloc_bytes)
+                              struct btrfs_space_info *sinfo, u64 alloc_bytes)
 {
        u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+        u64 thresh;
        if (sinfo->bytes_used + sinfo->bytes_reserved +
            alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
            alloc_bytes < div_factor(num_bytes, 8))
                return 0;
+        thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+        thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+        if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+                return 0;
        return 1;
 }
@@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                goto out;
        }
-        if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+        if (!force && !should_alloc_chunk(extent_root, space_info,
+                                          alloc_bytes)) {
                spin_unlock(&space_info->lock);
                goto out;
        }
        spin_unlock(&space_info->lock);
        /*
+         * If we have mixed data/metadata chunks we want to make sure we keep
+         * allocating mixed chunks instead of individual chunks.
+         */
+        if (btrfs_mixed_space_info(space_info))
+                flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+        /*
         * if we're doing a data chunk, go ahead and make sure that
         * we keep a reasonable number of metadata chunks allocated in the
         * FS as well.
@@ -3072,55 +3316,25 @@ out:
        return ret;
 }
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                struct btrfs_space_info *sinfo, u64 num_bytes)
-{
-        int ret;
-        int end_trans = 0;
-        if (sinfo->full)
-                return 0;
-        spin_lock(&sinfo->lock);
-        ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
-        spin_unlock(&sinfo->lock);
-        if (!ret)
-                return 0;
-        if (!trans) {
-                trans = btrfs_join_transaction(root, 1);
-                BUG_ON(IS_ERR(trans));
-                end_trans = 1;
-        }
-        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                             num_bytes + 2 * 1024 * 1024,
-                             get_alloc_profile(root, sinfo->flags), 0);
-        if (end_trans)
-                btrfs_end_transaction(trans, root);
-        return ret == 1 ? 1 : 0;
-}
 /*
 * shrink metadata reservation for delalloc
 */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root, u64 to_reclaim)
+                           struct btrfs_root *root, u64 to_reclaim, int sync)
 {
        struct btrfs_block_rsv *block_rsv;
+        struct btrfs_space_info *space_info;
        u64 reserved;
        u64 max_reclaim;
        u64 reclaimed = 0;
        int pause = 1;
-        int ret;
+        int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
        block_rsv = &root->fs_info->delalloc_block_rsv;
-        spin_lock(&block_rsv->lock);
+        space_info = block_rsv->space_info;
-        reserved = block_rsv->reserved;
-        spin_unlock(&block_rsv->lock);
+        smp_mb();
+        reserved = space_info->bytes_reserved;
        if (reserved == 0)
                return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        max_reclaim = min(reserved, to_reclaim);
        while (1) {
-                ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
+                /* have the flusher threads jump in and do some IO */
-                if (!ret) {
+                smp_mb();
-                        __set_current_state(TASK_INTERRUPTIBLE);
+                nr_pages = min_t(unsigned long, nr_pages,
-                        schedule_timeout(pause);
+                       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-                        pause <<= 1;
+                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
-                        if (pause > HZ / 10)
-                                pause = HZ / 10;
-                } else {
-                        pause = 1;
-                }
-                spin_lock(&block_rsv->lock);
+                spin_lock(&space_info->lock);
-                if (reserved > block_rsv->reserved)
+                if (reserved > space_info->bytes_reserved)
-                        reclaimed = reserved - block_rsv->reserved;
+                        reclaimed += reserved - space_info->bytes_reserved;
-                reserved = block_rsv->reserved;
+                reserved = space_info->bytes_reserved;
-                spin_unlock(&block_rsv->lock);
+                spin_unlock(&space_info->lock);
                if (reserved == 0 || reclaimed >= max_reclaim)
                        break;
                if (trans && trans->transaction->blocked)
                        return -EAGAIN;
+                __set_current_state(TASK_INTERRUPTIBLE);
+                schedule_timeout(pause);
+                pause <<= 1;
+                if (pause > HZ / 10)
+                        pause = HZ / 10;
        }
        return reclaimed >= to_reclaim;
 }
-static int should_retry_reserve(struct btrfs_trans_handle *trans,
+/*
-                                struct btrfs_root *root,
+ * Retries tells us how many times we've called reserve_metadata_bytes.  The
-                                struct btrfs_block_rsv *block_rsv,
+ * idea is if this is the first call (retries == 0) then we will add to our
-                                u64 num_bytes, int *retries)
+ * reserved count if we can't make the allocation in order to hold our place
+ * while we go and try and free up space.  That way for retries > 1 we don't try
+ * and add space, we just check to see if the amount of unused space is >= the
+ * total space, meaning that our reservation is valid.
+ *
+ * However if we don't intend to retry this reservation, pass -1 as retries so
+ * that it short circuits this logic.
+ */
+static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct btrfs_block_rsv *block_rsv,
+                                  u64 orig_bytes, int flush)
 {
        struct btrfs_space_info *space_info = block_rsv->space_info;
-        int ret;
+        u64 unused;
+        u64 num_bytes = orig_bytes;
+        int retries = 0;
+        int ret = 0;
+        bool reserved = false;
+        bool committed = false;
-        if ((*retries) > 2)
+again:
-                return -ENOSPC;
+        ret = -ENOSPC;
+        if (reserved)
+                num_bytes = 0;
-        ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
+        spin_lock(&space_info->lock);
-        if (ret)
+        unused = space_info->bytes_used + space_info->bytes_reserved +
-                return 1;
+                 space_info->bytes_pinned + space_info->bytes_readonly +
+                 space_info->bytes_may_use;
-        if (trans && trans->transaction->in_commit)
+        /*
-                return -ENOSPC;
+         * The idea here is that we've not already over-reserved the block group
+         * then we can go ahead and save our reservation first and then start
+         * flushing if we need to.  Otherwise if we've already overcommitted
+         * lets start flushing stuff first and then come back and try to make
+         * our reservation.
+         */
+        if (unused <= space_info->total_bytes) {
+                unused -= space_info->total_bytes;
+                if (unused >= num_bytes) {
+                        if (!reserved)
+                                space_info->bytes_reserved += orig_bytes;
+                        ret = 0;
+                } else {
+                        /*
+                         * Ok set num_bytes to orig_bytes since we aren't
+                         * overocmmitted, this way we only try and reclaim what
+                         * we need.
+                         */
+                        num_bytes = orig_bytes;
+                }
+        } else {
+                /*
+                 * Ok we're over committed, set num_bytes to the overcommitted
+                 * amount plus the amount of bytes that we need for this
+                 * reservation.
+                 */
+                num_bytes = unused - space_info->total_bytes +
+                        (orig_bytes * (retries + 1));
+        }
-        ret = shrink_delalloc(trans, root, num_bytes);
+        /*
-        if (ret)
+         * Couldn't make our reservation, save our place so while we're trying
-                return ret;
+         * to reclaim space we can actually use it instead of somebody else
+         * stealing it from us.
+         */
+        if (ret && !reserved) {
+                space_info->bytes_reserved += orig_bytes;
+                reserved = true;
+        }
-        spin_lock(&space_info->lock);
-        if (space_info->bytes_pinned < num_bytes)
-                ret = 1;
        spin_unlock(&space_info->lock);
-        if (ret)
-                return -ENOSPC;
-        (*retries)++;
-        if (trans)
+        if (!ret)
-                return -EAGAIN;
+                return 0;
-        trans = btrfs_join_transaction(root, 1);
+        if (!flush)
-        BUG_ON(IS_ERR(trans));
+                goto out;
-        ret = btrfs_commit_transaction(trans, root);
-        BUG_ON(ret);
-        return 1;
+        /*
-}
+         * We do synchronous shrinking since we don't actually unreserve
+         * metadata until after the IO is completed.
+         */
+        ret = shrink_delalloc(trans, root, num_bytes, 1);
+        if (ret > 0)
+                return 0;
+        else if (ret < 0)
+                goto out;
-static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
+        /*
-                                  u64 num_bytes)
+         * So if we were overcommitted it's possible that somebody else flushed
-{
+         * out enough space and we simply didn't have enough space to reclaim,
-        struct btrfs_space_info *space_info = block_rsv->space_info;
+         * so go back around and try again.
-        u64 unused;
+         */
-        int ret = -ENOSPC;
+        if (retries < 2) {
+                retries++;
+                goto again;
+        }
        spin_lock(&space_info->lock);
-        unused = space_info->bytes_used + space_info->bytes_reserved +
+        /*
-                 space_info->bytes_pinned + space_info->bytes_readonly;
+         * Not enough space to be reclaimed, don't bother committing the
+         * transaction.
+         */
+        if (space_info->bytes_pinned < orig_bytes)
+                ret = -ENOSPC;
+        spin_unlock(&space_info->lock);
+        if (ret)
+                goto out;
-        if (unused < space_info->total_bytes)
+        ret = -EAGAIN;
-                unused = space_info->total_bytes - unused;
+        if (trans || committed)
-        else
+                goto out;
-                unused = 0;
-        if (unused >= num_bytes) {
+        ret = -ENOSPC;
-                if (block_rsv->priority >= 10) {
+        trans = btrfs_join_transaction(root, 1);
-                        space_info->bytes_reserved += num_bytes;
+        if (IS_ERR(trans))
-                        ret = 0;
+                goto out;
-                } else {
+        ret = btrfs_commit_transaction(trans, root);
-                        if ((unused + block_rsv->reserved) *
+        if (!ret) {
-                            block_rsv->priority >=
+                trans = NULL;
-                            (num_bytes + block_rsv->reserved) * 10) {
+                committed = true;
-                                space_info->bytes_reserved += num_bytes;
+                goto again;
-                                ret = 0;
+        }
-                        }
-                }
+out:
+        if (reserved) {
+                spin_lock(&space_info->lock);
+                space_info->bytes_reserved -= orig_bytes;
+                spin_unlock(&space_info->lock);
        }
-        spin_unlock(&space_info->lock);
        return ret;
 }
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
 {
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_fs_info *fs_info = root->fs_info;
-        u64 alloc_target;
        block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
        if (!block_rsv)
                return NULL;
        btrfs_init_block_rsv(block_rsv);
-        alloc_target = btrfs_get_alloc_profile(root, 0);
        block_rsv->space_info = __find_space_info(fs_info,
                                                  BTRFS_BLOCK_GROUP_METADATA);
        return block_rsv;
 }
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct btrfs_block_rsv *block_rsv,
-                        u64 num_bytes, int *retries)
+                        u64 num_bytes)
 {
        int ret;
        if (num_bytes == 0)
                return 0;
-again:
-        ret = reserve_metadata_bytes(block_rsv, num_bytes);
+        ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
        if (!ret) {
                block_rsv_add_bytes(block_rsv, num_bytes, 1);
                return 0;
        }
-        ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
-        if (ret > 0)
-                goto again;
        return ret;
 }
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
                return 0;
        if (block_rsv->refill_used) {
-                ret = reserve_metadata_bytes(block_rsv, num_bytes);
+                ret = reserve_metadata_bytes(trans, root, block_rsv,
+                                             num_bytes, 0);
                if (!ret) {
                        block_rsv_add_bytes(block_rsv, num_bytes, 0);
                        return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
        sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
        spin_lock(&sinfo->lock);
+        if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+                data_used = 0;
        meta_used = sinfo->bytes_used;
        spin_unlock(&sinfo->lock);
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        block_rsv->size = num_bytes;
        num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
-                    sinfo->bytes_reserved + sinfo->bytes_readonly;
+                    sinfo->bytes_reserved + sinfo->bytes_readonly +
+                    sinfo->bytes_may_use;
        if (sinfo->total_bytes > num_bytes) {
                num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
-                                 int num_items, int *retries)
+                                 int num_items)
 {
        u64 num_bytes;
        int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
        num_bytes = calc_trans_metadata_size(root, num_items);
        ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-                                  num_bytes, retries);
+                                  num_bytes);
        if (!ret) {
                trans->bytes_reserved += num_bytes;
                trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
        u64 to_reserve;
        int nr_extents;
-        int retries = 0;
        int ret;
        if (btrfs_transaction_in_commit(root->fs_info))
                schedule_timeout(1);
        num_bytes = ALIGN(num_bytes, root->sectorsize);
-again:
        spin_lock(&BTRFS_I(inode)->accounting_lock);
        nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
        if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
                nr_extents = 0;
                to_reserve = 0;
        }
+        spin_unlock(&BTRFS_I(inode)->accounting_lock);
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
-        ret = reserve_metadata_bytes(block_rsv, to_reserve);
+        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
-        if (ret) {
+        if (ret)
-                spin_unlock(&BTRFS_I(inode)->accounting_lock);
-                ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
-                                           &retries);
-                if (ret > 0)
-                        goto again;
                return ret;
-        }
+        spin_lock(&BTRFS_I(inode)->accounting_lock);
        BTRFS_I(inode)->reserved_extents += nr_extents;
        atomic_inc(&BTRFS_I(inode)->outstanding_extents);
        spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
        if (block_rsv->size > 512 * 1024 * 1024)
-                shrink_delalloc(NULL, root, to_reserve);
+                shrink_delalloc(NULL, root, to_reserve, 0);
        return 0;
 }
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc)
 {
-        struct btrfs_block_group_cache *cache;
+        struct btrfs_block_group_cache *cache = NULL;
        struct btrfs_fs_info *info = root->fs_info;
-        int factor;
        u64 total = num_bytes;
        u64 old_val;
        u64 byte_in_group;
+        int factor;
        /* block accounting for super block */
        spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                        factor = 2;
                else
                        factor = 1;
+                /*
+                 * If this block group has free space cache written out, we
+                 * need to make sure to load it if we are removing space.  This
+                 * is because we need the unpinning stage to actually add the
+                 * space back to the block group, otherwise we will leak space.
+                 */
+                if (!alloc && cache->cached == BTRFS_CACHE_NO)
+                        cache_block_group(cache, trans, 1);
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
                spin_lock(&cache->space_info->lock);
                spin_lock(&cache->lock);
+                if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+                    cache->disk_cache_state < BTRFS_DC_CLEAR)
+                        cache->disk_cache_state = BTRFS_DC_CLEAR;
                cache->dirty = 1;
                old_val = btrfs_block_group_used(&cache->item);
                num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        bool found_uncached_bg = false;
        bool failed_cluster_refill = false;
        bool failed_alloc = false;
+        bool use_cluster = true;
        u64 ideal_cache_percent = 0;
        u64 ideal_cache_offset = 0;
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                return -ENOSPC;
        }
+        /*
+         * If the space info is for both data and metadata it means we have a
+         * small filesystem and we can't use the clustering stuff.
+         */
+        if (btrfs_mixed_space_info(space_info))
+                use_cluster = false;
        if (orig_root->ref_cows || empty_size)
                allowed_chunk_alloc = 1;
-        if (data & BTRFS_BLOCK_GROUP_METADATA) {
+        if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
                last_ptr = &root->fs_info->meta_alloc_cluster;
                if (!btrfs_test_opt(root, SSD))
                        empty_cluster = 64 * 1024;
        }
-        if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+        if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+            btrfs_test_opt(root, SSD)) {
                last_ptr = &root->fs_info->data_alloc_cluster;
        }
@@ -4641,6 +4934,10 @@ have_block_group:
                if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
                        u64 free_percent;
+                        ret = cache_block_group(block_group, trans, 1);
+                        if (block_group->cached == BTRFS_CACHE_FINISHED)
+                                goto have_block_group;
                        free_percent = btrfs_block_group_used(&block_group->item);
                        free_percent *= 100;
                        free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
                        if (loop > LOOP_CACHING_NOWAIT ||
                            (loop > LOOP_FIND_IDEAL &&
                             atomic_read(&space_info->caching_threads) < 2)) {
-                                ret = cache_block_group(block_group);
+                                ret = cache_block_group(block_group, trans, 0);
                                BUG_ON(ret);
                        }
                        found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
        u64 num_bytes = ins->offset;
        block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-        cache_block_group(block_group);
+        cache_block_group(block_group, trans, 0);
        caching_ctl = get_caching_control(block_group);
        if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        block_rsv = get_block_rsv(trans, root);
        if (block_rsv->size == 0) {
-                ret = reserve_metadata_bytes(block_rsv, blocksize);
+                ret = reserve_metadata_bytes(trans, root, block_rsv,
+                                             blocksize, 0);
                if (ret)
                        return ERR_PTR(ret);
                return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        if (!ret)
                return block_rsv;
-        WARN_ON(1);
-        printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
-                block_rsv->size, block_rsv->reserved,
-                block_rsv->freed[0], block_rsv->freed[1]);
        return ERR_PTR(-ENOSPC);
 }
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
        u64 generation;
        u64 refs;
        u64 flags;
-        u64 last = 0;
        u32 nritems;
        u32 blocksize;
        struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
                                           generation);
                if (ret)
                        break;
-                last = bytenr + blocksize;
                nread++;
        }
        wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
        return ret;
 }
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+        struct btrfs_block_group_cache *block_group;
+        u64 last = 0;
+        while (1) {
+                struct inode *inode;
+                block_group = btrfs_lookup_first_block_group(info, last);
+                while (block_group) {
+                        spin_lock(&block_group->lock);
+                        if (block_group->iref)
+                                break;
+                        spin_unlock(&block_group->lock);
+                        block_group = next_block_group(info->tree_root,
+                                                       block_group);
+                }
+                if (!block_group) {
+                        if (last == 0)
+                                break;
+                        last = 0;
+                        continue;
+                }
+                inode = block_group->inode;
+                block_group->iref = 0;
+                block_group->inode = NULL;
+                spin_unlock(&block_group->lock);
+                iput(inode);
+                last = block_group->key.objectid + block_group->key.offset;
+                btrfs_put_block_group(block_group);
+        }
+}
 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
        struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct extent_buffer *leaf;
+        int need_clear = 0;
+        u64 cache_gen;
        root = info->extent_root;
        key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        if (!path)
                return -ENOMEM;
+        cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+        if (cache_gen != 0 &&
+            btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+                need_clear = 1;
+        if (btrfs_test_opt(root, CLEAR_CACHE))
+                need_clear = 1;
+        if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+                printk(KERN_INFO "btrfs: disk space caching is enabled\n");
        while (1) {
                ret = find_first_block_group(root, path, &key);
                if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                INIT_LIST_HEAD(&cache->list);
                INIT_LIST_HEAD(&cache->cluster_list);
+                if (need_clear)
+                        cache->disk_cache_state = BTRFS_DC_CLEAR;
                /*
                 * we only want to have 32k of ram per block group for keeping
                 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->key.offset = size;
        cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
        cache->sectorsize = root->sectorsize;
+        cache->fs_info = root->fs_info;
        /*
         * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct btrfs_block_group_cache *block_group;
        struct btrfs_free_cluster *cluster;
+        struct btrfs_root *tree_root = root->fs_info->tree_root;
        struct btrfs_key key;
+        struct inode *inode;
        int ret;
+        int factor;
        root = root->fs_info->extent_root;
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        BUG_ON(!block_group->ro);
        memcpy(&key, &block_group->key, sizeof(key));
+        if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+                                  BTRFS_BLOCK_GROUP_RAID1 |
+                                  BTRFS_BLOCK_GROUP_RAID10))
+                factor = 2;
+        else
+                factor = 1;
        /* make sure this block group isn't part of an allocation cluster */
        cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        BUG_ON(!path);
+        inode = lookup_free_space_inode(root, block_group, path);
+        if (!IS_ERR(inode)) {
+                btrfs_orphan_add(trans, inode);
+                clear_nlink(inode);
+                /* One for the block groups ref */
+                spin_lock(&block_group->lock);
+                if (block_group->iref) {
+                        block_group->iref = 0;
+                        block_group->inode = NULL;
+                        spin_unlock(&block_group->lock);
+                        iput(inode);
+                } else {
+                        spin_unlock(&block_group->lock);
+                }
+                /* One for our lookup ref */
+                iput(inode);
+        }
+        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+        key.offset = block_group->key.objectid;
+        key.type = 0;
+        ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+        if (ret < 0)
+                goto out;
+        if (ret > 0)
+                btrfs_release_path(tree_root, path);
+        if (ret == 0) {
+                ret = btrfs_del_item(trans, tree_root, path);
+                if (ret)
+                        goto out;
+                btrfs_release_path(tree_root, path);
+        }
        spin_lock(&root->fs_info->block_group_cache_lock);
        rb_erase(&block_group->cache_node,
                 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_lock(&block_group->space_info->lock);
        block_group->space_info->total_bytes -= block_group->key.offset;
        block_group->space_info->bytes_readonly -= block_group->key.offset;
+        block_group->space_info->disk_total -= block_group->key.offset * factor;
        spin_unlock(&block_group->space_info->lock);
+        memcpy(&key, &block_group->key, sizeof(key));
        btrfs_clear_space_info_full(root->fs_info);
        btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53a..eac10e3260a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
                          struct address_space *mapping, gfp_t mask)
 {
        tree->state = RB_ROOT;
-        tree->buffer = RB_ROOT;
+        INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
        tree->ops = NULL;
        tree->dirty_bytes = 0;
        spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
        return ret;
 }
-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
-                                          u64 offset, struct rb_node *node)
-{
-        struct rb_root *root = &tree->buffer;
-        struct rb_node **p = &root->rb_node;
-        struct rb_node *parent = NULL;
-        struct extent_buffer *eb;
-        while (*p) {
-                parent = *p;
-                eb = rb_entry(parent, struct extent_buffer, rb_node);
-                if (offset < eb->start)
-                        p = &(*p)->rb_left;
-                else if (offset > eb->start)
-                        p = &(*p)->rb_right;
-                else
-                        return eb;
-        }
-        rb_link_node(node, parent, p);
-        rb_insert_color(node, root);
-        return NULL;
-}
-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
-                                           u64 offset)
-{
-        struct rb_root *root = &tree->buffer;
-        struct rb_node *n = root->rb_node;
-        struct extent_buffer *eb;
-        while (n) {
-                eb = rb_entry(n, struct extent_buffer, rb_node);
-                if (offset < eb->start)
-                        n = n->rb_left;
-                else if (offset > eb->start)
-                        n = n->rb_right;
-                else
-                        return eb;
-        }
-        return NULL;
-}
 static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
                     struct extent_state *other)
 {
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
        struct page *page = bvec->bv_page;
        struct extent_io_tree *tree = bio->bi_private;
        u64 start;
-        u64 end;
        start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
-        end = start + bvec->bv_len - 1;
        bio->bi_private = NULL;
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 last_byte = i_size_read(inode);
        u64 block_start;
        u64 iosize;
-        u64 unlock_start;
        sector_t sector;
        struct extent_state *cached_state = NULL;
        struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                if (tree->ops && tree->ops->writepage_end_io_hook)
                        tree->ops->writepage_end_io_hook(page, start,
                                                         page_end, NULL, 1);
-                unlock_start = page_end + 1;
                goto done;
        }
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        if (tree->ops && tree->ops->writepage_end_io_hook)
                                tree->ops->writepage_end_io_hook(page, cur,
                                                         page_end, NULL, 1);
-                        unlock_start = page_end + 1;
                        break;
                }
                em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        cur += iosize;
                        pg_offset += iosize;
-                        unlock_start = cur;
                        continue;
                }
                /* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
-        int range_whole = 0;
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        } else {
                index = wbc->range_start >> PAGE_CACHE_SHIFT;
                end = wbc->range_end >> PAGE_CACHE_SHIFT;
-                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-                        range_whole = 1;
                scanned = 1;
        }
 retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
                                         NULL, 1,
                                         end_bio_extent_preparewrite, 0,
                                         0, 0);
+                        if (ret && !err)
+                                err = ret;
                        iocount++;
                        block_start = block_start + iosize;
                } else {
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
        kmem_cache_free(extent_buffer_cache, eb);
 }
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+                                                unsigned long start_idx)
+{
+        unsigned long index;
+        struct page *page;
+        if (!eb->first_page)
+                return;
+        index = num_extent_pages(eb->start, eb->len);
+        if (start_idx >= index)
+                return;
+        do {
+                index--;
+                page = extent_buffer_page(eb, index);
+                if (page)
+                        page_cache_release(page);
+        } while (index != start_idx);
+}
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+        btrfs_release_extent_buffer_page(eb, 0);
+        __free_extent_buffer(eb);
+}
 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                                          u64 start, unsigned long len,
                                          struct page *page0,
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        struct page *p;
        struct address_space *mapping = tree->mapping;
        int uptodate = 1;
+        int ret;
-        spin_lock(&tree->buffer_lock);
+        rcu_read_lock();
-        eb = buffer_search(tree, start);
+        eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-        if (eb) {
+        if (eb && atomic_inc_not_zero(&eb->refs)) {
-                atomic_inc(&eb->refs);
+                rcu_read_unlock();
-                spin_unlock(&tree->buffer_lock);
                mark_page_accessed(eb->first_page);
                return eb;
        }
-        spin_unlock(&tree->buffer_lock);
+        rcu_read_unlock();
        eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb)
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        if (uptodate)
                set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+        ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+        if (ret)
+                goto free_eb;
        spin_lock(&tree->buffer_lock);
-        exists = buffer_tree_insert(tree, start, &eb->rb_node);
+        ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
-        if (exists) {
+        if (ret == -EEXIST) {
+                exists = radix_tree_lookup(&tree->buffer,
+                                                start >> PAGE_CACHE_SHIFT);
                /* add one reference for the caller */
                atomic_inc(&exists->refs);
                spin_unlock(&tree->buffer_lock);
+                radix_tree_preload_end();
                goto free_eb;
        }
        /* add one reference for the tree */
        atomic_inc(&eb->refs);
        spin_unlock(&tree->buffer_lock);
+        radix_tree_preload_end();
        return eb;
 free_eb:
        if (!atomic_dec_and_test(&eb->refs))
                return exists;
-        for (index = 1; index < i; index++)
+        btrfs_release_extent_buffer(eb);
-                page_cache_release(extent_buffer_page(eb, index));
-        page_cache_release(extent_buffer_page(eb, 0));
-        __free_extent_buffer(eb);
        return exists;
 }
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
 {
        struct extent_buffer *eb;
-        spin_lock(&tree->buffer_lock);
+        rcu_read_lock();
-        eb = buffer_search(tree, start);
+        eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-        if (eb)
+        if (eb && atomic_inc_not_zero(&eb->refs)) {
-                atomic_inc(&eb->refs);
+                rcu_read_unlock();
-        spin_unlock(&tree->buffer_lock);
-        if (eb)
                mark_page_accessed(eb->first_page);
+                return eb;
+        }
+        rcu_read_unlock();
-        return eb;
+        return NULL;
 }
 void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 }
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+        struct extent_buffer *eb =
+                        container_of(head, struct extent_buffer, rcu_head);
+        btrfs_release_extent_buffer(eb);
+}
 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
 {
        u64 start = page_offset(page);
        struct extent_buffer *eb;
        int ret = 1;
-        unsigned long i;
-        unsigned long num_pages;
        spin_lock(&tree->buffer_lock);
-        eb = buffer_search(tree, start);
+        eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
        if (!eb)
                goto out;
-        if (atomic_read(&eb->refs) > 1) {
+        if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
                ret = 0;
                goto out;
        }
-        if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+        /*
+         * set @eb->refs to 0 if it is already 1, and then release the @eb.
+         * Or go back.
+         */
+        if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
                ret = 0;
                goto out;
        }
-        /* at this point we can safely release the extent buffer */
-        num_pages = num_extent_pages(eb->start, eb->len);
+        radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
-        for (i = 0; i < num_pages; i++)
-                page_cache_release(extent_buffer_page(eb, i));
-        rb_erase(&eb->rb_node, &tree->buffer);
-        __free_extent_buffer(eb);
 out:
        spin_unlock(&tree->buffer_lock);
+        /* at this point we can safely release the extent buffer */
+        if (atomic_read(&eb->refs) == 0)
+                call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
        return ret;
 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590da..1c6d4f342ef7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
 struct extent_io_tree {
        struct rb_root state;
-        struct rb_root buffer;
+        struct radix_tree_root buffer;
        struct address_space *mapping;
        u64 dirty_bytes;
        spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
        unsigned long bflags;
        atomic_t refs;
        struct list_head leak_list;
-        struct rb_node rb_node;
+        struct rcu_head rcu_head;
        /* the spinlock is used to protect most operations */
        spinlock_t lock;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d6451..23cb8da3ff66 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
                goto out;
        }
        if (IS_ERR(rb_node)) {
-                em = ERR_PTR(PTR_ERR(rb_node));
+                em = ERR_CAST(rb_node);
                goto out;
        }
        em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
                goto out;
        }
        if (IS_ERR(rb_node)) {
-                em = ERR_PTR(PTR_ERR(rb_node));
+                em = ERR_CAST(rb_node);
                goto out;
        }
        em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d99..22ee0dc2e6b8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,761 @@
 #include "ctree.h"
 #include "free-space-cache.h"
 #include "transaction.h"
+#include "disk-io.h"
 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
+static void recalculate_thresholds(struct btrfs_block_group_cache
+                                   *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+                           struct btrfs_free_space *info);
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+                                      struct btrfs_block_group_cache
+                                      *block_group, struct btrfs_path *path)
+{
+        struct btrfs_key key;
+        struct btrfs_key location;
+        struct btrfs_disk_key disk_key;
+        struct btrfs_free_space_header *header;
+        struct extent_buffer *leaf;
+        struct inode *inode = NULL;
+        int ret;
+        spin_lock(&block_group->lock);
+        if (block_group->inode)
+                inode = igrab(block_group->inode);
+        spin_unlock(&block_group->lock);
+        if (inode)
+                return inode;
+        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+        key.offset = block_group->key.objectid;
+        key.type = 0;
+        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+        if (ret < 0)
+                return ERR_PTR(ret);
+        if (ret > 0) {
+                btrfs_release_path(root, path);
+                return ERR_PTR(-ENOENT);
+        }
+        leaf = path->nodes[0];
+        header = btrfs_item_ptr(leaf, path->slots[0],
+                                struct btrfs_free_space_header);
+        btrfs_free_space_key(leaf, header, &disk_key);
+        btrfs_disk_key_to_cpu(&location, &disk_key);
+        btrfs_release_path(root, path);
+        inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
+        if (!inode)
+                return ERR_PTR(-ENOENT);
+        if (IS_ERR(inode))
+                return inode;
+        if (is_bad_inode(inode)) {
+                iput(inode);
+                return ERR_PTR(-ENOENT);
+        }
+        spin_lock(&block_group->lock);
+        if (!root->fs_info->closing) {
+                block_group->inode = igrab(inode);
+                block_group->iref = 1;
+        }
+        spin_unlock(&block_group->lock);
+        return inode;
+}
+int create_free_space_inode(struct btrfs_root *root,
+                            struct btrfs_trans_handle *trans,
+                            struct btrfs_block_group_cache *block_group,
+                            struct btrfs_path *path)
+{
+        struct btrfs_key key;
+        struct btrfs_disk_key disk_key;
+        struct btrfs_free_space_header *header;
+        struct btrfs_inode_item *inode_item;
+        struct extent_buffer *leaf;
+        u64 objectid;
+        int ret;
+        ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+        if (ret < 0)
+                return ret;
+        ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+        if (ret)
+                return ret;
+        leaf = path->nodes[0];
+        inode_item = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_inode_item);
+        btrfs_item_key(leaf, &disk_key, path->slots[0]);
+        memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
+                             sizeof(*inode_item));
+        btrfs_set_inode_generation(leaf, inode_item, trans->transid);
+        btrfs_set_inode_size(leaf, inode_item, 0);
+        btrfs_set_inode_nbytes(leaf, inode_item, 0);
+        btrfs_set_inode_uid(leaf, inode_item, 0);
+        btrfs_set_inode_gid(leaf, inode_item, 0);
+        btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
+        btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
+                              BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+        btrfs_set_inode_nlink(leaf, inode_item, 1);
+        btrfs_set_inode_transid(leaf, inode_item, trans->transid);
+        btrfs_set_inode_block_group(leaf, inode_item,
+                                    block_group->key.objectid);
+        btrfs_mark_buffer_dirty(leaf);
+        btrfs_release_path(root, path);
+        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+        key.offset = block_group->key.objectid;
+        key.type = 0;
+        ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                      sizeof(struct btrfs_free_space_header));
+        if (ret < 0) {
+                btrfs_release_path(root, path);
+                return ret;
+        }
+        leaf = path->nodes[0];
+        header = btrfs_item_ptr(leaf, path->slots[0],
+                                struct btrfs_free_space_header);
+        memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
+        btrfs_set_free_space_key(leaf, header, &disk_key);
+        btrfs_mark_buffer_dirty(leaf);
+        btrfs_release_path(root, path);
+        return 0;
+}
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+                                    struct btrfs_trans_handle *trans,
+                                    struct btrfs_path *path,
+                                    struct inode *inode)
+{
+        loff_t oldsize;
+        int ret = 0;
+        trans->block_rsv = root->orphan_block_rsv;
+        ret = btrfs_block_rsv_check(trans, root,
+                                    root->orphan_block_rsv,
+                                    0, 5);
+        if (ret)
+                return ret;
+        oldsize = i_size_read(inode);
+        btrfs_i_size_write(inode, 0);
+        truncate_pagecache(inode, oldsize, 0);
+        /*
+         * We don't need an orphan item because truncating the free space cache
+         * will never be split across transactions.
+         */
+        ret = btrfs_truncate_inode_items(trans, root, inode,
+                                         0, BTRFS_EXTENT_DATA_KEY);
+        if (ret) {
+                WARN_ON(1);
+                return ret;
+        }
+        return btrfs_update_inode(trans, root, inode);
+}
+static int readahead_cache(struct inode *inode)
+{
+        struct file_ra_state *ra;
+        unsigned long last_index;
+        ra = kzalloc(sizeof(*ra), GFP_NOFS);
+        if (!ra)
+                return -ENOMEM;
+        file_ra_state_init(ra, inode->i_mapping);
+        last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+        page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+        kfree(ra);
+        return 0;
+}
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *block_group)
+{
+        struct btrfs_root *root = fs_info->tree_root;
+        struct inode *inode;
+        struct btrfs_free_space_header *header;
+        struct extent_buffer *leaf;
+        struct page *page;
+        struct btrfs_path *path;
+        u32 *checksums = NULL, *crc;
+        char *disk_crcs = NULL;
+        struct btrfs_key key;
+        struct list_head bitmaps;
+        u64 num_entries;
+        u64 num_bitmaps;
+        u64 generation;
+        u32 cur_crc = ~(u32)0;
+        pgoff_t index = 0;
+        unsigned long first_page_offset;
+        int num_checksums;
+        int ret = 0;
+        /*
+         * If we're unmounting then just return, since this does a search on the
+         * normal root and not the commit root and we could deadlock.
+         */
+        smp_mb();
+        if (fs_info->closing)
+                return 0;
+        /*
+         * If this block group has been marked to be cleared for one reason or
+         * another then we can't trust the on disk cache, so just return.
+         */
+        spin_lock(&block_group->lock);
+        if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
+                spin_unlock(&block_group->lock);
+                return 0;
+        }
+        spin_unlock(&block_group->lock);
+        INIT_LIST_HEAD(&bitmaps);
+        path = btrfs_alloc_path();
+        if (!path)
+                return 0;
+        inode = lookup_free_space_inode(root, block_group, path);
+        if (IS_ERR(inode)) {
+                btrfs_free_path(path);
+                return 0;
+        }
+        /* Nothing in the space cache, goodbye */
+        if (!i_size_read(inode)) {
+                btrfs_free_path(path);
+                goto out;
+        }
+        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+        key.offset = block_group->key.objectid;
+        key.type = 0;
+        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+        if (ret) {
+                btrfs_free_path(path);
+                goto out;
+        }
+        leaf = path->nodes[0];
+        header = btrfs_item_ptr(leaf, path->slots[0],
+                                struct btrfs_free_space_header);
+        num_entries = btrfs_free_space_entries(leaf, header);
+        num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+        generation = btrfs_free_space_generation(leaf, header);
+        btrfs_free_path(path);
+        if (BTRFS_I(inode)->generation != generation) {
+                printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
+                       " not match free space cache generation (%llu) for "
+                       "block group %llu\n",
+                       (unsigned long long)BTRFS_I(inode)->generation,
+                       (unsigned long long)generation,
+                       (unsigned long long)block_group->key.objectid);
+                goto out;
+        }
+        if (!num_entries)
+                goto out;
+        /* Setup everything for doing checksumming */
+        num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+        checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+        if (!checksums)
+                goto out;
+        first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+        disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
+        if (!disk_crcs)
+                goto out;
+        ret = readahead_cache(inode);
+        if (ret) {
+                ret = 0;
+                goto out;
+        }
+        while (1) {
+                struct btrfs_free_space_entry *entry;
+                struct btrfs_free_space *e;
+                void *addr;
+                unsigned long offset = 0;
+                unsigned long start_offset = 0;
+                int need_loop = 0;
+                if (!num_entries && !num_bitmaps)
+                        break;
+                if (index == 0) {
+                        start_offset = first_page_offset;
+                        offset = start_offset;
+                }
+                page = grab_cache_page(inode->i_mapping, index);
+                if (!page) {
+                        ret = 0;
+                        goto free_cache;
+                }
+                if (!PageUptodate(page)) {
+                        btrfs_readpage(NULL, page);
+                        lock_page(page);
+                        if (!PageUptodate(page)) {
+                                unlock_page(page);
+                                page_cache_release(page);
+                                printk(KERN_ERR "btrfs: error reading free "
+                                       "space cache: %llu\n",
+                                       (unsigned long long)
+                                       block_group->key.objectid);
+                                goto free_cache;
+                        }
+                }
+                addr = kmap(page);
+                if (index == 0) {
+                        u64 *gen;
+                        memcpy(disk_crcs, addr, first_page_offset);
+                        gen = addr + (sizeof(u32) * num_checksums);
+                        if (*gen != BTRFS_I(inode)->generation) {
+                                printk(KERN_ERR "btrfs: space cache generation"
+                                       " (%llu) does not match inode (%llu) "
+                                       "for block group %llu\n",
+                                       (unsigned long long)*gen,
+                                       (unsigned long long)
+                                       BTRFS_I(inode)->generation,
+                                       (unsigned long long)
+                                       block_group->key.objectid);
+                                kunmap(page);
+                                unlock_page(page);
+                                page_cache_release(page);
+                                goto free_cache;
+                        }
+                        crc = (u32 *)disk_crcs;
+                }
+                entry = addr + start_offset;
+                /* First lets check our crc before we do anything fun */
+                cur_crc = ~(u32)0;
+                cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
+                                          PAGE_CACHE_SIZE - start_offset);
+                btrfs_csum_final(cur_crc, (char *)&cur_crc);
+                if (cur_crc != *crc) {
+                        printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
+                               "block group %llu\n", index,
+                               (unsigned long long)block_group->key.objectid);
+                        kunmap(page);
+                        unlock_page(page);
+                        page_cache_release(page);
+                        goto free_cache;
+                }
+                crc++;
+                while (1) {
+                        if (!num_entries)
+                                break;
+                        need_loop = 1;
+                        e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+                        if (!e) {
+                                kunmap(page);
+                                unlock_page(page);
+                                page_cache_release(page);
+                                goto free_cache;
+                        }
+                        e->offset = le64_to_cpu(entry->offset);
+                        e->bytes = le64_to_cpu(entry->bytes);
+                        if (!e->bytes) {
+                                kunmap(page);
+                                kfree(e);
+                                unlock_page(page);
+                                page_cache_release(page);
+                                goto free_cache;
+                        }
+                        if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
+                                spin_lock(&block_group->tree_lock);
+                                ret = link_free_space(block_group, e);
+                                spin_unlock(&block_group->tree_lock);
+                                BUG_ON(ret);
+                        } else {
+                                e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+                                if (!e->bitmap) {
+                                        kunmap(page);
+                                        kfree(e);
+                                        unlock_page(page);
+                                        page_cache_release(page);
+                                        goto free_cache;
+                                }
+                                spin_lock(&block_group->tree_lock);
+                                ret = link_free_space(block_group, e);
+                                block_group->total_bitmaps++;
+                                recalculate_thresholds(block_group);
+                                spin_unlock(&block_group->tree_lock);
+                                list_add_tail(&e->list, &bitmaps);
+                        }
+                        num_entries--;
+                        offset += sizeof(struct btrfs_free_space_entry);
+                        if (offset + sizeof(struct btrfs_free_space_entry) >=
+                            PAGE_CACHE_SIZE)
+                                break;
+                        entry++;
+                }
+                /*
+                 * We read an entry out of this page, we need to move on to the
+                 * next page.
+                 */
+                if (need_loop) {
+                        kunmap(page);
+                        goto next;
+                }
+                /*
+                 * We add the bitmaps at the end of the entries in order that
+                 * the bitmap entries are added to the cache.
+                 */
+                e = list_entry(bitmaps.next, struct btrfs_free_space, list);
+                list_del_init(&e->list);
+                memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
+                kunmap(page);
+                num_bitmaps--;
+next:
+                unlock_page(page);
+                page_cache_release(page);
+                index++;
+        }
+        ret = 1;
+out:
+        kfree(checksums);
+        kfree(disk_crcs);
+        iput(inode);
+        return ret;
+free_cache:
+        /* This cache is bogus, make sure it gets cleared */
+        spin_lock(&block_group->lock);
+        block_group->disk_cache_state = BTRFS_DC_CLEAR;
+        spin_unlock(&block_group->lock);
+        btrfs_remove_free_space_cache(block_group);
+        goto out;
+}
+int btrfs_write_out_cache(struct btrfs_root *root,
+                          struct btrfs_trans_handle *trans,
+                          struct btrfs_block_group_cache *block_group,
+                          struct btrfs_path *path)
+{
+        struct btrfs_free_space_header *header;
+        struct extent_buffer *leaf;
+        struct inode *inode;
+        struct rb_node *node;
+        struct list_head *pos, *n;
+        struct page *page;
+        struct extent_state *cached_state = NULL;
+        struct list_head bitmap_list;
+        struct btrfs_key key;
+        u64 bytes = 0;
+        u32 *crc, *checksums;
+        pgoff_t index = 0, last_index = 0;
+        unsigned long first_page_offset;
+        int num_checksums;
+        int entries = 0;
+        int bitmaps = 0;
+        int ret = 0;
+        root = root->fs_info->tree_root;
+        INIT_LIST_HEAD(&bitmap_list);
+        spin_lock(&block_group->lock);
+        if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+                spin_unlock(&block_group->lock);
+                return 0;
+        }
+        spin_unlock(&block_group->lock);
+        inode = lookup_free_space_inode(root, block_group, path);
+        if (IS_ERR(inode))
+                return 0;
+        if (!i_size_read(inode)) {
+                iput(inode);
+                return 0;
+        }
+        last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+        filemap_write_and_wait(inode->i_mapping);
+        btrfs_wait_ordered_range(inode, inode->i_size &
+                                 ~(root->sectorsize - 1), (u64)-1);
+        /* We need a checksum per page. */
+        num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+        crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+        if (!crc) {
+                iput(inode);
+                return 0;
+        }
+        /* Since the first page has all of our checksums and our generation we
+         * need to calculate the offset into the page that we can start writing
+         * our entries.
+         */
+        first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+        node = rb_first(&block_group->free_space_offset);
+        if (!node)
+                goto out_free;
+        /*
+         * Lock all pages first so we can lock the extent safely.
+         *
+         * NOTE: Because we hold the ref the entire time we're going to write to
+         * the page find_get_page should never fail, so we don't do a check
+         * after find_get_page at this point.  Just putting this here so people
+         * know and don't freak out.
+         */
+        while (index <= last_index) {
+                page = grab_cache_page(inode->i_mapping, index);
+                if (!page) {
+                        pgoff_t i = 0;
+                        while (i < index) {
+                                page = find_get_page(inode->i_mapping, i);
+                                unlock_page(page);
+                                page_cache_release(page);
+                                page_cache_release(page);
+                                i++;
+                        }
+                        goto out_free;
+                }
+                index++;
+        }
+        index = 0;
+        lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+                         0, &cached_state, GFP_NOFS);
+        /* Write out the extent entries */
+        do {
+                struct btrfs_free_space_entry *entry;
+                void *addr;
+                unsigned long offset = 0;
+                unsigned long start_offset = 0;
+                if (index == 0) {
+                        start_offset = first_page_offset;
+                        offset = start_offset;
+                }
+                page = find_get_page(inode->i_mapping, index);
+                addr = kmap(page);
+                entry = addr + start_offset;
+                memset(addr, 0, PAGE_CACHE_SIZE);
+                while (1) {
+                        struct btrfs_free_space *e;
+                        e = rb_entry(node, struct btrfs_free_space, offset_index);
+                        entries++;
+                        entry->offset = cpu_to_le64(e->offset);
+                        entry->bytes = cpu_to_le64(e->bytes);
+                        if (e->bitmap) {
+                                entry->type = BTRFS_FREE_SPACE_BITMAP;
+                                list_add_tail(&e->list, &bitmap_list);
+                                bitmaps++;
+                        } else {
+                                entry->type = BTRFS_FREE_SPACE_EXTENT;
+                        }
+                        node = rb_next(node);
+                        if (!node)
+                                break;
+                        offset += sizeof(struct btrfs_free_space_entry);
+                        if (offset + sizeof(struct btrfs_free_space_entry) >=
+                            PAGE_CACHE_SIZE)
+                                break;
+                        entry++;
+                }
+                *crc = ~(u32)0;
+                *crc = btrfs_csum_data(root, addr + start_offset, *crc,
+                                       PAGE_CACHE_SIZE - start_offset);
+                kunmap(page);
+                btrfs_csum_final(*crc, (char *)crc);
+                crc++;
+                bytes += PAGE_CACHE_SIZE;
+                ClearPageChecked(page);
+                set_page_extent_mapped(page);
+                SetPageUptodate(page);
+                set_page_dirty(page);
+                /*
+                 * We need to release our reference we got for grab_cache_page,
+                 * except for the first page which will hold our checksums, we
+                 * do that below.
+                 */
+                if (index != 0) {
+                        unlock_page(page);
+                        page_cache_release(page);
+                }
+                page_cache_release(page);
+                index++;
+        } while (node);
+        /* Write out the bitmaps */
+        list_for_each_safe(pos, n, &bitmap_list) {
+                void *addr;
+                struct btrfs_free_space *entry =
+                        list_entry(pos, struct btrfs_free_space, list);
+                page = find_get_page(inode->i_mapping, index);
+                addr = kmap(page);
+                memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+                *crc = ~(u32)0;
+                *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+                kunmap(page);
+                btrfs_csum_final(*crc, (char *)crc);
+                crc++;
+                bytes += PAGE_CACHE_SIZE;
+                ClearPageChecked(page);
+                set_page_extent_mapped(page);
+                SetPageUptodate(page);
+                set_page_dirty(page);
+                unlock_page(page);
+                page_cache_release(page);
+                page_cache_release(page);
+                list_del_init(&entry->list);
+                index++;
+        }
+        /* Zero out the rest of the pages just to make sure */
+        while (index <= last_index) {
+                void *addr;
+                page = find_get_page(inode->i_mapping, index);
+                addr = kmap(page);
+                memset(addr, 0, PAGE_CACHE_SIZE);
+                kunmap(page);
+                ClearPageChecked(page);
+                set_page_extent_mapped(page);
+                SetPageUptodate(page);
+                set_page_dirty(page);
+                unlock_page(page);
+                page_cache_release(page);
+                page_cache_release(page);
+                bytes += PAGE_CACHE_SIZE;
+                index++;
+        }
+        btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+        /* Write the checksums and trans id to the first page */
+        {
+                void *addr;
+                u64 *gen;
+                page = find_get_page(inode->i_mapping, 0);
+                addr = kmap(page);
+                memcpy(addr, checksums, sizeof(u32) * num_checksums);
+                gen = addr + (sizeof(u32) * num_checksums);
+                *gen = trans->transid;
+                kunmap(page);
+                ClearPageChecked(page);
+                set_page_extent_mapped(page);
+                SetPageUptodate(page);
+                set_page_dirty(page);
+                unlock_page(page);
+                page_cache_release(page);
+                page_cache_release(page);
+        }
+        BTRFS_I(inode)->generation = trans->transid;
+        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+                             i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+        filemap_write_and_wait(inode->i_mapping);
+        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+        key.offset = block_group->key.objectid;
+        key.type = 0;
+        ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+        if (ret < 0) {
+                ret = 0;
+                clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+                                 EXTENT_DIRTY | EXTENT_DELALLOC |
+                                 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+                goto out_free;
+        }
+        leaf = path->nodes[0];
+        if (ret > 0) {
+                struct btrfs_key found_key;
+                BUG_ON(!path->slots[0]);
+                path->slots[0]--;
+                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+                if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+                    found_key.offset != block_group->key.objectid) {
+                        ret = 0;
+                        clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+                                         EXTENT_DIRTY | EXTENT_DELALLOC |
+                                         EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+                                         GFP_NOFS);
+                        btrfs_release_path(root, path);
+                        goto out_free;
+                }
+        }
+        header = btrfs_item_ptr(leaf, path->slots[0],
+                                struct btrfs_free_space_header);
+        btrfs_set_free_space_entries(leaf, header, entries);
+        btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+        btrfs_set_free_space_generation(leaf, header, trans->transid);
+        btrfs_mark_buffer_dirty(leaf);
+        btrfs_release_path(root, path);
+        ret = 1;
+out_free:
+        if (ret == 0) {
+                invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+                spin_lock(&block_group->lock);
+                block_group->disk_cache_state = BTRFS_DC_ERROR;
+                spin_unlock(&block_group->lock);
+                BTRFS_I(inode)->generation = 0;
+        }
+        kfree(checksums);
+        btrfs_update_inode(trans, root, inode);
+        iput(inode);
+        return ret;
+}
 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
                                          u64 offset)
 {
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011b..e49ca5c321b5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
        struct list_head list;
 };
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+                                      struct btrfs_block_group_cache
+                                      *block_group, struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_root *root,
+                            struct btrfs_trans_handle *trans,
+                            struct btrfs_block_group_cache *block_group,
+                            struct btrfs_path *path);
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+                                    struct btrfs_trans_handle *trans,
+                                    struct btrfs_path *path,
+                                    struct inode *inode);
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *block_group);
+int btrfs_write_out_cache(struct btrfs_root *root,
+                          struct btrfs_trans_handle *trans,
+                          struct btrfs_block_group_cache *block_group,
+                          struct btrfs_path *path);
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
                         u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf69ce0..558cac2dfa54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        u64 num_bytes;
-        u64 orig_start;
-        u64 disk_num_bytes;
        u64 blocksize = root->sectorsize;
        u64 actual_end;
        u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
        int i;
        int will_compress;
-        orig_start = start;
        actual_end = min_t(u64, isize, end + 1);
 again:
        will_compress = 0;
@@ -371,7 +367,6 @@ again:
        total_compressed = min(total_compressed, max_uncompressed);
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
        num_bytes = max(blocksize,  num_bytes);
-        disk_num_bytes = num_bytes;
        total_in = 0;
        ret = 0;
@@ -467,7 +462,6 @@ again:
                if (total_compressed >= total_in) {
                        will_compress = 0;
                } else {
-                        disk_num_bytes = total_compressed;
                        num_bytes = total_in;
                }
        }
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
        u64 disk_num_bytes;
        u64 cur_alloc_size;
        u64 blocksize = root->sectorsize;
-        u64 actual_end;
-        u64 isize = i_size_read(inode);
        struct btrfs_key ins;
        struct extent_map *em;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
+        BUG_ON(root == root->fs_info->tree_root);
        trans = btrfs_join_transaction(root, 1);
        BUG_ON(!trans);
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-        actual_end = min_t(u64, isize, end + 1);
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
        num_bytes = max(blocksize,  num_bytes);
        disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        int type;
        int nocow;
        int check_prev = 1;
+        bool nolock = false;
        path = btrfs_alloc_path();
        BUG_ON(!path);
-        trans = btrfs_join_transaction(root, 1);
+        if (root == root->fs_info->tree_root) {
+                nolock = true;
+                trans = btrfs_join_transaction_nolock(root, 1);
+        } else {
+                trans = btrfs_join_transaction(root, 1);
+        }
        BUG_ON(!trans);
        cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
                BUG_ON(ret);
        }
-        ret = btrfs_end_transaction(trans, root);
+        if (nolock) {
-        BUG_ON(ret);
+                ret = btrfs_end_transaction_nolock(trans, root);
+                BUG_ON(ret);
+        } else {
+                ret = btrfs_end_transaction(trans, root);
+                BUG_ON(ret);
+        }
        btrfs_free_path(path);
        return 0;
 }
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
+                int do_list = (root->root_key.objectid !=
+                               BTRFS_ROOT_TREE_OBJECTID);
                if (*bits & EXTENT_FIRST_DELALLOC)
                        *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
                root->fs_info->delalloc_bytes += len;
-                if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+                if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
                                      &root->fs_info->delalloc_inodes);
                }
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
+                int do_list = (root->root_key.objectid !=
+                               BTRFS_ROOT_TREE_OBJECTID);
                if (*bits & EXTENT_FIRST_DELALLOC)
                        *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
                if (*bits & EXTENT_DO_ACCOUNTING)
                        btrfs_delalloc_release_metadata(inode, len);
-                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+                    && do_list)
                        btrfs_free_reserved_data_space(inode, len);
                spin_lock(&root->fs_info->delalloc_lock);
                root->fs_info->delalloc_bytes -= len;
                BTRFS_I(inode)->delalloc_bytes -= len;
-                if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+                if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
                    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                        list_del_init(&BTRFS_I(inode)->delalloc_inodes);
                }
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
        if (map_length < length + size)
                return 1;
-        return 0;
+        return ret;
 }
 /*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-        ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+        if (root == root->fs_info->tree_root)
+                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+        else
+                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
        BUG_ON(ret);
        if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct extent_state *cached_state = NULL;
        int compressed = 0;
        int ret;
+        bool nolock = false;
        ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
                                             end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                return 0;
        BUG_ON(!ordered_extent);
+        nolock = (root == root->fs_info->tree_root);
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
                ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                if (!ret) {
-                        trans = btrfs_join_transaction(root, 1);
+                        if (nolock)
+                                trans = btrfs_join_transaction_nolock(root, 1);
+                        else
+                                trans = btrfs_join_transaction(root, 1);
+                        BUG_ON(!trans);
                        btrfs_set_trans_block_group(trans, inode);
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                         ordered_extent->file_offset + ordered_extent->len - 1,
                         0, &cached_state, GFP_NOFS);
-        trans = btrfs_join_transaction(root, 1);
+        if (nolock)
+                trans = btrfs_join_transaction_nolock(root, 1);
+        else
+                trans = btrfs_join_transaction(root, 1);
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                                ordered_extent->len);
                BUG_ON(ret);
        } else {
+                BUG_ON(root == root->fs_info->tree_root);
                ret = insert_reserved_file_extent(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        ret = btrfs_update_inode(trans, root, inode);
        BUG_ON(ret);
 out:
-        btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+        if (nolock) {
-        if (trans)
+                if (trans)
-                btrfs_end_transaction(trans, root);
+                        btrfs_end_transaction_nolock(trans, root);
+        } else {
+                btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+                if (trans)
+                        btrfs_end_transaction(trans, root);
+        }
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
        /* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 {
        struct btrfs_path *path;
        struct extent_buffer *leaf;
-        struct btrfs_item *item;
        struct btrfs_key key, found_key;
        struct btrfs_trans_handle *trans;
        struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                /* pull out the item */
                leaf = path->nodes[0];
-                item = btrfs_item_nr(leaf, path->slots[0]);
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                /* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
                                           dir, index);
-        BUG_ON(ret);
+        if (ret == -ENOENT)
+                ret = 0;
 err:
        btrfs_free_path(path);
        if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
 {
        struct extent_buffer *eb;
        int level;
-        int ret;
        u64 refs = 1;
+        int uninitialized_var(ret);
        for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
                if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
                if (refs > 1)
                        return 1;
        }
-        return 0;
+        return ret; /* XXX callers? */
 }
 /*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
-        if (root->ref_cows)
+        if (root->ref_cows || root == root->fs_info->tree_root)
                btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
        path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
                } else {
                        break;
                }
-                if (found_extent && root->ref_cows) {
+                if (found_extent && (root->ref_cows ||
+                                     root == root->fs_info->tree_root)) {
                        btrfs_set_path_blocking(path);
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
        int ret;
        truncate_inode_pages(&inode->i_data, 0);
-        if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+                               root == root->fs_info->tree_root))
                goto no_delete;
        if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
        }
        spin_unlock(&root->inode_lock);
-        if (empty && btrfs_root_refs(&root->root_item) == 0) {
+        /*
+         * Free space cache has inodes in the tree root, but the tree root has a
+         * root_refs of 0, so this could end up dropping the tree root as a
+         * snapshot, so we need the extra !root->fs_info->tree_root check to
+         * make sure we don't drop it.
+         */
+        if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+            root != root->fs_info->tree_root) {
                synchronize_srcu(&root->fs_info->subvol_srcu);
                spin_lock(&root->inode_lock);
                empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        int ret = 0;
+        bool nolock = false;
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
+        smp_mb();
+        nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
        if (wbc->sync_mode == WB_SYNC_ALL) {
-                trans = btrfs_join_transaction(root, 1);
+                if (nolock)
+                        trans = btrfs_join_transaction_nolock(root, 1);
+                else
+                        trans = btrfs_join_transaction(root, 1);
                btrfs_set_trans_block_group(trans, inode);
-                ret = btrfs_commit_transaction(trans, root);
+                if (nolock)
+                        ret = btrfs_end_transaction_nolock(trans, root);
+                else
+                        ret = btrfs_commit_transaction(trans, root);
        }
        return ret;
 }
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_dio_private *dip;
        struct bio_vec *bvec = bio->bi_io_vec;
-        u64 start;
        int skip_sum;
        int write = rw & REQ_WRITE;
        int ret = 0;
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        dip->inode = inode;
        dip->logical_offset = file_offset;
-        start = dip->logical_offset;
        dip->bytes = 0;
        do {
                dip->bytes += bvec->bv_len;
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
+        if (root == root->fs_info->tree_root) {
+                struct btrfs_block_group_cache *block_group;
+                block_group = btrfs_lookup_block_group(root->fs_info,
+                                                BTRFS_I(inode)->block_group);
+                if (block_group && block_group->inode == inode) {
+                        spin_lock(&block_group->lock);
+                        block_group->inode = NULL;
+                        spin_unlock(&block_group->lock);
+                        btrfs_put_block_group(block_group);
+                } else if (block_group) {
+                        btrfs_put_block_group(block_group);
+                }
+        }
        spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-        if (btrfs_root_refs(&root->root_item) == 0)
+        if (btrfs_root_refs(&root->root_item) == 0 &&
+            root != root->fs_info->tree_root)
                return 1;
        else
                return generic_drop_inode(inode);
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        return 0;
 }
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                   int sync)
 {
        struct btrfs_inode *binode;
        struct inode *inode = NULL;
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
        spin_unlock(&root->fs_info->delalloc_lock);
        if (inode) {
-                write_inode_now(inode, 0);
+                if (sync) {
+                        filemap_write_and_wait(inode->i_mapping);
+                        /*
+                         * We have to do this because compression doesn't
+                         * actually set PG_writeback until it submits the pages
+                         * for IO, which happens in an async thread, so we could
+                         * race and not actually wait for any writeback pages
+                         * because they've not been submitted yet.  Technically
+                         * this could still be the case for the ordered stuff
+                         * since the async thread may not have started to do its
+                         * work yet.  If this becomes the case then we need to
+                         * figure out a way to make sure that in writepage we
+                         * wait for any async pages to be submitted before
+                         * returning so that fdatawait does what its supposed to
+                         * do.
+                         */
+                        btrfs_wait_ordered_range(inode, 0, (u64)-1);
+                } else {
+                        filemap_flush(inode->i_mapping);
+                }
                if (delay_iput)
                        btrfs_add_delayed_iput(inode);
                else
@@ -6757,27 +6836,33 @@ out_unlock:
        return err;
 }
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
-                              u64 start, u64 num_bytes, u64 min_size,
+                                       u64 start, u64 num_bytes, u64 min_size,
-                              loff_t actual_len, u64 *alloc_hint)
+                                       loff_t actual_len, u64 *alloc_hint,
+                                       struct btrfs_trans_handle *trans)
 {
-        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key ins;
        u64 cur_offset = start;
        int ret = 0;
+        bool own_trans = true;
+        if (trans)
+                own_trans = false;
        while (num_bytes > 0) {
-                trans = btrfs_start_transaction(root, 3);
+                if (own_trans) {
-                if (IS_ERR(trans)) {
+                        trans = btrfs_start_transaction(root, 3);
-                        ret = PTR_ERR(trans);
+                        if (IS_ERR(trans)) {
-                        break;
+                                ret = PTR_ERR(trans);
+                                break;
+                        }
                }
                ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
                                           0, *alloc_hint, (u64)-1, &ins, 1);
                if (ret) {
-                        btrfs_end_transaction(trans, root);
+                        if (own_trans)
+                                btrfs_end_transaction(trans, root);
                        break;
                }
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
                ret = btrfs_update_inode(trans, root, inode);
                BUG_ON(ret);
-                btrfs_end_transaction(trans, root);
+                if (own_trans)
+                        btrfs_end_transaction(trans, root);
        }
        return ret;
 }
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+                              u64 start, u64 num_bytes, u64 min_size,
+                              loff_t actual_len, u64 *alloc_hint)
+{
+        return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+                                           min_size, actual_len, alloc_hint,
+                                           NULL);
+}
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+                                    struct btrfs_trans_handle *trans, int mode,
+                                    u64 start, u64 num_bytes, u64 min_size,
+                                    loff_t actual_len, u64 *alloc_hint)
+{
+        return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+                                           min_size, actual_len, alloc_hint, trans);
+}
 static long btrfs_fallocate(struct inode *inode, int mode,
                            loff_t offset, loff_t len)
 {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58dbe..463d91b4dd3a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 static noinline int create_subvol(struct btrfs_root *root,
                                  struct dentry *dentry,
-                                  char *name, int namelen)
+                                  char *name, int namelen,
+                                  u64 *async_transid)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
-        err = btrfs_commit_transaction(trans, root);
+        if (async_transid) {
+                *async_transid = trans->transid;
+                err = btrfs_commit_transaction_async(trans, root, 1);
+        } else {
+                err = btrfs_commit_transaction(trans, root);
+        }
        if (err && !ret)
                ret = err;
        return ret;
 }
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+                           char *name, int namelen, u64 *async_transid)
 {
        struct inode *inode;
        struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
        list_add(&pending_snapshot->list,
                 &trans->transaction->pending_snapshots);
-        ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+        if (async_transid) {
+                *async_transid = trans->transid;
+                ret = btrfs_commit_transaction_async(trans,
+                                     root->fs_info->extent_root, 1);
+        } else {
+                ret = btrfs_commit_transaction(trans,
+                                               root->fs_info->extent_root);
+        }
        BUG_ON(ret);
        ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
        return ret;
 }
+/*  copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+        uid_t fsuid = current_fsuid();
+        if (!(dir->i_mode & S_ISVTX))
+                return 0;
+        if (inode->i_uid == fsuid)
+                return 0;
+        if (dir->i_uid == fsuid)
+                return 0;
+        return !capable(CAP_FOWNER);
+}
+/*  copy of may_delete in fs/namei.c()
+ *      Check whether we can remove a link victim from directory dir, check
+ *  whether the type of victim is right.
+ *  1. We can't do it if dir is read-only (done in permission())
+ *  2. We should have write and exec permissions on dir
+ *  3. We can't remove anything from append-only dir
+ *  4. We can't do anything with immutable dir (done in permission())
+ *  5. If the sticky bit on dir is set we should either
+ *      a. be owner of dir, or
+ *      b. be owner of victim, or
+ *      c. have CAP_FOWNER capability
+ *  6. If the victim is append-only or immutable we can't do antyhing with
+ *     links pointing to it.
+ *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ *  9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ *     nfs_async_unlink().
+ */
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+        int error;
+        if (!victim->d_inode)
+                return -ENOENT;
+        BUG_ON(victim->d_parent->d_inode != dir);
+        audit_inode_child(victim, dir);
+        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+        if (error)
+                return error;
+        if (IS_APPEND(dir))
+                return -EPERM;
+        if (btrfs_check_sticky(dir, victim->d_inode)||
+                IS_APPEND(victim->d_inode)||
+            IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+                return -EPERM;
+        if (isdir) {
+                if (!S_ISDIR(victim->d_inode->i_mode))
+                        return -ENOTDIR;
+                if (IS_ROOT(victim))
+                        return -EBUSY;
+        } else if (S_ISDIR(victim->d_inode->i_mode))
+                return -EISDIR;
+        if (IS_DEADDIR(dir))
+                return -ENOENT;
+        if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+                return -EBUSY;
+        return 0;
+}
 /* copy of may_create in fs/namei.c() */
 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 {
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 */
 static noinline int btrfs_mksubvol(struct path *parent,
                                   char *name, int namelen,
-                                   struct btrfs_root *snap_src)
+                                   struct btrfs_root *snap_src,
+                                   u64 *async_transid)
 {
        struct inode *dir  = parent->dentry->d_inode;
        struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
                goto out_up_read;
        if (snap_src) {
-                error = create_snapshot(snap_src, dentry);
+                error = create_snapshot(snap_src, dentry,
+                                        name, namelen, async_transid);
        } else {
                error = create_subvol(BTRFS_I(dir)->root, dentry,
-                                      name, namelen);
+                                      name, namelen, async_transid);
        }
        if (!error)
                fsnotify_mkdir(dir, dentry);
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
        char *sizestr;
        char *devstr = NULL;
        int ret = 0;
-        int namelen;
        int mod = 0;
        if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
                return PTR_ERR(vol_args);
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-        namelen = strlen(vol_args->name);
        mutex_lock(&root->fs_info->volume_mutex);
        sizestr = vol_args->name;
@@ -801,11 +885,13 @@ out_unlock:
        return ret;
 }
-static noinline int btrfs_ioctl_snap_create(struct file *file,
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
-                                            void __user *arg, int subvol)
+                                                    char *name,
+                                                    unsigned long fd,
+                                                    int subvol,
+                                                    u64 *transid)
 {
        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
-        struct btrfs_ioctl_vol_args *vol_args;
        struct file *src_file;
        int namelen;
        int ret = 0;
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
-        vol_args = memdup_user(arg, sizeof(*vol_args));
+        namelen = strlen(name);
-        if (IS_ERR(vol_args))
+        if (strchr(name, '/')) {
-                return PTR_ERR(vol_args);
-        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-        namelen = strlen(vol_args->name);
-        if (strchr(vol_args->name, '/')) {
                ret = -EINVAL;
                goto out;
        }
        if (subvol) {
-                ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+                ret = btrfs_mksubvol(&file->f_path, name, namelen,
-                                     NULL);
+                                     NULL, transid);
        } else {
                struct inode *src_inode;
-                src_file = fget(vol_args->fd);
+                src_file = fget(fd);
                if (!src_file) {
                        ret = -EINVAL;
                        goto out;
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                        fput(src_file);
                        goto out;
                }
-                ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+                ret = btrfs_mksubvol(&file->f_path, name, namelen,
-                                     BTRFS_I(src_inode)->root);
+                                     BTRFS_I(src_inode)->root,
+                                     transid);
                fput(src_file);
        }
 out:
+        return ret;
+}
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+                                            void __user *arg, int subvol,
+                                            int async)
+{
+        struct btrfs_ioctl_vol_args *vol_args = NULL;
+        struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
+        char *name;
+        u64 fd;
+        u64 transid = 0;
+        int ret;
+        if (async) {
+                async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
+                if (IS_ERR(async_vol_args))
+                        return PTR_ERR(async_vol_args);
+                name = async_vol_args->name;
+                fd = async_vol_args->fd;
+                async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
+        } else {
+                vol_args = memdup_user(arg, sizeof(*vol_args));
+                if (IS_ERR(vol_args))
+                        return PTR_ERR(vol_args);
+                name = vol_args->name;
+                fd = vol_args->fd;
+                vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+        }
+        ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+                                              subvol, &transid);
+        if (!ret && async) {
+                if (copy_to_user(arg +
+                                offsetof(struct btrfs_ioctl_async_vol_args,
+                                transid), &transid, sizeof(transid)))
+                        return -EFAULT;
+        }
        kfree(vol_args);
+        kfree(async_vol_args);
        return ret;
 }
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        args = kmalloc(sizeof(*args), GFP_KERNEL);
+        args = memdup_user(argp, sizeof(*args));
-        if (!args)
+        if (IS_ERR(args))
-                return -ENOMEM;
+                return PTR_ERR(args);
-        if (copy_from_user(args, argp, sizeof(*args))) {
-                kfree(args);
-                return -EFAULT;
-        }
        inode = fdentry(file)->d_inode;
        ret = search_ioctl(inode, args);
        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        args = kmalloc(sizeof(*args), GFP_KERNEL);
+        args = memdup_user(argp, sizeof(*args));
-        if (!args)
+        if (IS_ERR(args))
-                return -ENOMEM;
+                return PTR_ERR(args);
-        if (copy_from_user(args, argp, sizeof(*args))) {
-                kfree(args);
-                return -EFAULT;
-        }
        inode = fdentry(file)->d_inode;
        if (args->treeid == 0)
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        int ret;
        int err = 0;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args))
                return PTR_ERR(vol_args);
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        }
        inode = dentry->d_inode;
+        dest = BTRFS_I(inode)->root;
+        if (!capable(CAP_SYS_ADMIN)){
+                /*
+                 * Regular user.  Only allow this with a special mount
+                 * option, when the user has write+exec access to the
+                 * subvol root, and when rmdir(2) would have been
+                 * allowed.
+                 *
+                 * Note that this is _not_ check that the subvol is
+                 * empty or doesn't contain data that we wouldn't
+                 * otherwise be able to delete.
+                 *
+                 * Users who want to delete empty subvols should try
+                 * rmdir(2).
+                 */
+                err = -EPERM;
+                if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+                        goto out_dput;
+                /*
+                 * Do not allow deletion if the parent dir is the same
+                 * as the dir to be deleted.  That means the ioctl
+                 * must be called on the dentry referencing the root
+                 * of the subvol, not a random directory contained
+                 * within it.
+                 */
+                err = -EINVAL;
+                if (root == dest)
+                        goto out_dput;
+                err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+                if (err)
+                        goto out_dput;
+                /* check if subvolume may be deleted by a non-root user */
+                err = btrfs_may_delete(dir, dentry, 1);
+                if (err)
+                        goto out_dput;
+        }
        if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
                err = -EINVAL;
                goto out_dput;
        }
-        dest = BTRFS_I(inode)->root;
        mutex_lock(&inode->i_mutex);
        err = d_invalidate(dentry);
        if (err)
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                BUG_ON(ret);
        }
-        ret = btrfs_commit_transaction(trans, root);
+        ret = btrfs_end_transaction(trans, root);
        BUG_ON(ret);
        inode->i_flags |= S_DEAD;
 out_up_write:
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        path->reada = 2;
        if (inode < src) {
-                mutex_lock(&inode->i_mutex);
+                mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-                mutex_lock(&src->i_mutex);
+                mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
        } else {
-                mutex_lock(&src->i_mutex);
+                mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
-                mutex_lock(&inode->i_mutex);
+                mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
        }
        /* determine range to clone */
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        while (1) {
                struct btrfs_ordered_extent *ordered;
                lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
-                ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
+                ordered = btrfs_lookup_first_ordered_extent(src, off+len);
-                if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
+                if (!ordered &&
+                    !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
+                                   EXTENT_DELALLOC, 0, NULL))
                        break;
                unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
                if (ordered)
                        btrfs_put_ordered_extent(ordered);
-                btrfs_wait_ordered_range(src, off, off+len);
+                btrfs_wait_ordered_range(src, off, len);
        }
        /* clone data */
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        }
                        btrfs_release_path(root, path);
-                        if (key.offset + datal < off ||
+                        if (key.offset + datal <= off ||
                            key.offset >= off+len)
                                goto next;
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
        return 0;
 }
+static void get_block_group_info(struct list_head *groups_list,
+                                 struct btrfs_ioctl_space_info *space)
+{
+        struct btrfs_block_group_cache *block_group;
+        space->total_bytes = 0;
+        space->used_bytes = 0;
+        space->flags = 0;
+        list_for_each_entry(block_group, groups_list, list) {
+                space->flags = block_group->flags;
+                space->total_bytes += block_group->key.offset;
+                space->used_bytes +=
+                        btrfs_block_group_used(&block_group->item);
+        }
+}
 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 {
        struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        struct btrfs_ioctl_space_info *dest_orig;
        struct btrfs_ioctl_space_info *user_dest;
        struct btrfs_space_info *info;
+        u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+                       BTRFS_BLOCK_GROUP_SYSTEM,
+                       BTRFS_BLOCK_GROUP_METADATA,
+                       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+        int num_types = 4;
        int alloc_size;
        int ret = 0;
        int slot_count = 0;
+        int i, c;
        if (copy_from_user(&space_args,
                           (struct btrfs_ioctl_space_args __user *)arg,
                           sizeof(space_args)))
                return -EFAULT;
-        /* first we count slots */
+        for (i = 0; i < num_types; i++) {
-        rcu_read_lock();
+                struct btrfs_space_info *tmp;
-        list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
-                slot_count++;
+                info = NULL;
-        rcu_read_unlock();
+                rcu_read_lock();
+                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+                                        list) {
+                        if (tmp->flags == types[i]) {
+                                info = tmp;
+                                break;
+                        }
+                }
+                rcu_read_unlock();
+                if (!info)
+                        continue;
+                down_read(&info->groups_sem);
+                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+                        if (!list_empty(&info->block_groups[c]))
+                                slot_count++;
+                }
+                up_read(&info->groups_sem);
+        }
        /* space_slots == 0 means they are asking for a count */
        if (space_args.space_slots == 0) {
                space_args.total_spaces = slot_count;
                goto out;
        }
+        slot_count = min_t(int, space_args.space_slots, slot_count);
        alloc_size = sizeof(*dest) * slot_count;
        /* we generally have at most 6 or so space infos, one for each raid
         * level.  So, a whole page should be more than enough for everyone
         */
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
        dest_orig = dest;
        /* now we have a buffer to copy into */
-        rcu_read_lock();
+        for (i = 0; i < num_types; i++) {
-        list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
+                struct btrfs_space_info *tmp;
-                /* make sure we don't copy more than we allocated
-                 * in our buffer
+                info = NULL;
-                 */
+                rcu_read_lock();
-                if (slot_count == 0)
+                list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
-                        break;
+                                        list) {
-                slot_count--;
+                        if (tmp->flags == types[i]) {
+                                info = tmp;
-                /* make sure userland has enough room in their buffer */
+                                break;
-                if (space_args.total_spaces >= space_args.space_slots)
+                        }
-                        break;
+                }
+                rcu_read_unlock();
-                space.flags = info->flags;
+                if (!info)
-                space.total_bytes = info->total_bytes;
+                        continue;
-                space.used_bytes = info->bytes_used;
+                down_read(&info->groups_sem);
-                memcpy(dest, &space, sizeof(space));
+                for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
-                dest++;
+                        if (!list_empty(&info->block_groups[c])) {
-                space_args.total_spaces++;
+                                get_block_group_info(&info->block_groups[c],
+                                                     &space);
+                                memcpy(dest, &space, sizeof(space));
+                                dest++;
+                                space_args.total_spaces++;
+                        }
+                }
+                up_read(&info->groups_sem);
        }
-        rcu_read_unlock();
        user_dest = (struct btrfs_ioctl_space_info *)
                (arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
        return 0;
 }
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+        struct btrfs_trans_handle *trans;
+        u64 transid;
+        trans = btrfs_start_transaction(root, 0);
+        transid = trans->transid;
+        btrfs_commit_transaction_async(trans, root, 0);
+        if (argp)
+                if (copy_to_user(argp, &transid, sizeof(transid)))
+                        return -EFAULT;
+        return 0;
+}
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+        u64 transid;
+        if (argp) {
+                if (copy_from_user(&transid, argp, sizeof(transid)))
+                        return -EFAULT;
+        } else {
+                transid = 0;  /* current trans */
+        }
+        return btrfs_wait_for_commit(root, transid);
+}
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
        case FS_IOC_GETVERSION:
                return btrfs_ioctl_getversion(file, argp);
        case BTRFS_IOC_SNAP_CREATE:
-                return btrfs_ioctl_snap_create(file, argp, 0);
+                return btrfs_ioctl_snap_create(file, argp, 0, 0);
+        case BTRFS_IOC_SNAP_CREATE_ASYNC:
+                return btrfs_ioctl_snap_create(file, argp, 0, 1);
        case BTRFS_IOC_SUBVOL_CREATE:
-                return btrfs_ioctl_snap_create(file, argp, 1);
+                return btrfs_ioctl_snap_create(file, argp, 1, 0);
        case BTRFS_IOC_SNAP_DESTROY:
                return btrfs_ioctl_snap_destroy(file, argp);
        case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
        case BTRFS_IOC_SYNC:
                btrfs_sync_fs(file->f_dentry->d_sb, 1);
                return 0;
+        case BTRFS_IOC_START_SYNC:
+                return btrfs_ioctl_start_sync(file, argp);
+        case BTRFS_IOC_WAIT_SYNC:
+                return btrfs_ioctl_wait_sync(file, argp);
        }
        return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517f..17c99ebdf960 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,21 @@
 #define BTRFS_IOCTL_MAGIC 0x94
 #define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
 /* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
 struct btrfs_ioctl_vol_args {
        __s64 fd;
        char name[BTRFS_PATH_NAME_MAX + 1];
 };
+#define BTRFS_SNAPSHOT_NAME_MAX 4079
+struct btrfs_ioctl_async_vol_args {
+        __s64 fd;
+        __u64 transid;
+        char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
+};
 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
 struct btrfs_ioctl_ino_lookup_args {
        __u64 treeid;
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
                                    struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
+                                   struct btrfs_ioctl_async_vol_args)
 #endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5add..f4621f6deca1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 {
        u64 end;
        u64 orig_end;
-        u64 wait_end;
        struct btrfs_ordered_extent *ordered;
        int found;
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                if (orig_end > INT_LIMIT(loff_t))
                        orig_end = INT_LIMIT(loff_t);
        }
-        wait_end = orig_end;
 again:
        /* start IO across the range first to instantiate any delalloc
         * extents
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4a..045c9c2b2d7e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
 #include "locking.h"
 #include "btrfs_inode.h"
 #include "async-thread.h"
+#include "free-space-cache.h"
 /*
 * backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
        u64 search_start;
        u64 extents_found;
-        int block_rsv_retries;
        unsigned int stage:8;
        unsigned int create_reloc_tree:1;
        unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
        LIST_HEAD(reloc_roots);
        u64 num_bytes = 0;
        int ret;
-        int retries = 0;
        mutex_lock(&root->fs_info->trans_mutex);
        rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
        if (!err) {
                num_bytes = rc->merging_rsv_size;
                ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
-                                          num_bytes, &retries);
+                                          num_bytes);
                if (ret)
                        err = ret;
        }
@@ -2155,7 +2153,6 @@ again:
                        btrfs_end_transaction(trans, rc->extent_root);
                        btrfs_block_rsv_release(rc->extent_root,
                                                rc->block_rsv, num_bytes);
-                        retries = 0;
                        goto again;
                }
        }
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
        num_bytes = calcu_metadata_size(rc, node, 1) * 2;
        trans->block_rsv = rc->block_rsv;
-        ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
+        ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
-                                  &rc->block_rsv_retries);
        if (ret) {
                if (ret == -EAGAIN)
                        rc->commit_transaction = 1;
                return ret;
        }
-        rc->block_rsv_retries = 0;
        return 0;
 }
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
                BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
                ret = get_ref_objectid_v0(rc, path, extent_key,
                                          &ref_owner, NULL);
+                if (ret < 0)
+                        return ret;
                BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
                level = (int)ref_owner;
                /* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
        return ret;
 }
+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+                                    struct inode *inode, u64 ino)
+{
+        struct btrfs_key key;
+        struct btrfs_path *path;
+        struct btrfs_root *root = fs_info->tree_root;
+        struct btrfs_trans_handle *trans;
+        unsigned long nr;
+        int ret = 0;
+        if (inode)
+                goto truncate;
+        key.objectid = ino;
+        key.type = BTRFS_INODE_ITEM_KEY;
+        key.offset = 0;
+        inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+        if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+                if (inode && !IS_ERR(inode))
+                        iput(inode);
+                return -ENOENT;
+        }
+truncate:
+        path = btrfs_alloc_path();
+        if (!path) {
+                ret = -ENOMEM;
+                goto out;
+        }
+        trans = btrfs_join_transaction(root, 0);
+        if (IS_ERR(trans)) {
+                btrfs_free_path(path);
+                goto out;
+        }
+        ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+        btrfs_free_path(path);
+        nr = trans->blocks_used;
+        btrfs_end_transaction(trans, root);
+        btrfs_btree_balance_dirty(root, nr);
+out:
+        iput(inode);
+        return ret;
+}
 /*
 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
 * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
        int counted;
        int ret;
-        path = btrfs_alloc_path();
-        if (!path)
-                return -ENOMEM;
        ref_root = btrfs_extent_data_ref_root(leaf, ref);
        ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
        ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
        ref_count = btrfs_extent_data_ref_count(leaf, ref);
+        /*
+         * This is an extent belonging to the free space cache, lets just delete
+         * it and redo the search.
+         */
+        if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+                ret = delete_block_group_cache(rc->extent_root->fs_info,
+                                               NULL, ref_objectid);
+                if (ret != -ENOENT)
+                        return ret;
+                ret = 0;
+        }
+        path = btrfs_alloc_path();
+        if (!path)
+                return -ENOMEM;
        root = read_fs_root(rc->extent_root->fs_info, ref_root);
        if (IS_ERR(root)) {
                err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
         * is no reservation in transaction handle.
         */
        ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
-                                  rc->extent_root->nodesize * 256,
+                                  rc->extent_root->nodesize * 256);
-                                  &rc->block_rsv_retries);
        if (ret)
                return ret;
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
        rc->extents_found = 0;
        rc->nodes_relocated = 0;
        rc->merging_rsv_size = 0;
-        rc->block_rsv_retries = 0;
        rc->create_reloc_tree = 1;
        set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 {
        struct btrfs_fs_info *fs_info = extent_root->fs_info;
        struct reloc_control *rc;
+        struct inode *inode;
+        struct btrfs_path *path;
        int ret;
        int rw = 0;
        int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                rw = 1;
        }
+        path = btrfs_alloc_path();
+        if (!path) {
+                err = -ENOMEM;
+                goto out;
+        }
+        inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+                                        path);
+        btrfs_free_path(path);
+        if (!IS_ERR(inode))
+                ret = delete_block_group_cache(fs_info, inode, 0);
+        else
+                ret = PTR_ERR(inode);
+        if (ret && ret != -ENOENT) {
+                err = ret;
+                goto out;
+        }
        rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
        if (IS_ERR(rc->data_inode)) {
                err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
                btrfs_add_ordered_sum(inode, ordered, sums);
        }
        btrfs_put_ordered_extent(ordered);
-        return 0;
+        return ret;
 }
 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c8..6a1086e83ffc 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
 {
        struct btrfs_root *dead_root;
-        struct btrfs_item *item;
        struct btrfs_root_item *ri;
        struct btrfs_key key;
        struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
                        nritems = btrfs_header_nritems(leaf);
                        slot = path->slots[0];
                }
-                item = btrfs_item_nr(leaf, slot);
                btrfs_item_key_to_cpu(leaf, &key, slot);
                if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
                        goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f5..8299a25ffc8f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
        ret = close_ctree(root);
        sb->s_fs_info = NULL;
+        (void)ret; /* FIXME: need to fix VFS to return error? */
 }
 enum {
@@ -68,7 +70,8 @@ enum {
        Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
-        Opt_discard, Opt_err,
+        Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
+        Opt_user_subvol_rm_allowed,
 };
 static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
        {Opt_flushoncommit, "flushoncommit"},
        {Opt_ratio, "metadata_ratio=%d"},
        {Opt_discard, "discard"},
+        {Opt_space_cache, "space_cache"},
+        {Opt_clear_cache, "clear_cache"},
+        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_err, NULL},
 };
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_discard:
                        btrfs_set_opt(info->mount_opt, DISCARD);
                        break;
+                case Opt_space_cache:
+                        printk(KERN_INFO "btrfs: enabling disk space caching\n");
+                        btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+                case Opt_clear_cache:
+                        printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+                        btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+                        break;
+                case Opt_user_subvol_rm_allowed:
+                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+                        break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
 find_root:
        new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
        if (IS_ERR(new_root))
-                return ERR_PTR(PTR_ERR(new_root));
+                return ERR_CAST(new_root);
        if (btrfs_root_refs(&new_root->root_item) == 0)
                return ERR_PTR(-ENOENT);
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
 {
        struct inode *inode;
        struct dentry *root_dentry;
-        struct btrfs_super_block *disk_super;
        struct btrfs_root *tree_root;
        struct btrfs_key key;
        int err;
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
                return PTR_ERR(tree_root);
        }
        sb->s_fs_info = tree_root;
-        disk_super = &tree_root->fs_info->super_copy;
        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
        key.type = BTRFS_INODE_ITEM_KEY;
@@ -560,8 +574,8 @@ static int btrfs_test_super(struct super_block *s, void *data)
 * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
 *        for multiple device setup.  Make sure to keep it in sync.
 */
-static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
-                const char *dev_name, void *data, struct vfsmount *mnt)
+                const char *dev_name, void *data)
 {
        struct block_device *bdev = NULL;
        struct super_block *s;
@@ -571,7 +585,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
        char *subvol_name = NULL;
        u64 subvol_objectid = 0;
        int error = 0;
-        int found = 0;
        if (!(flags & MS_RDONLY))
                mode |= FMODE_WRITE;
@@ -580,7 +593,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                                          &subvol_name, &subvol_objectid,
                                          &fs_devices);
        if (error)
-                return error;
+                return ERR_PTR(error);
        error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
        if (error)
@@ -607,7 +620,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                        goto error_close_devices;
                }
-                found = 1;
                btrfs_close_devices(fs_devices);
        } else {
                char b[BDEVNAME_SIZE];
@@ -629,7 +641,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
        if (IS_ERR(root)) {
                error = PTR_ERR(root);
                deactivate_locked_super(s);
-                goto error;
+                goto error_free_subvol_name;
        }
        /* if they gave us a subvolume name bind mount into that */
        if (strcmp(subvol_name, ".")) {
@@ -643,24 +655,21 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                        deactivate_locked_super(s);
                        error = PTR_ERR(new_root);
                        dput(root);
-                        goto error_close_devices;
+                        goto error_free_subvol_name;
                }
                if (!new_root->d_inode) {
                        dput(root);
                        dput(new_root);
                        deactivate_locked_super(s);
                        error = -ENXIO;
-                        goto error_close_devices;
+                        goto error_free_subvol_name;
                }
                dput(root);
                root = new_root;
        }
-        mnt->mnt_sb = s;
-        mnt->mnt_root = root;
        kfree(subvol_name);
-        return 0;
+        return root;
 error_s:
        error = PTR_ERR(s);
@@ -668,8 +677,7 @@ error_close_devices:
        btrfs_close_devices(fs_devices);
 error_free_subvol_name:
        kfree(subvol_name);
-error:
+        return ERR_PTR(error);
-        return error;
 }
 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct list_head *head = &root->fs_info->space_info;
        struct btrfs_space_info *found;
        u64 total_used = 0;
+        u64 total_used_data = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)root->fs_info->fsid;
        rcu_read_lock();
-        list_for_each_entry_rcu(found, head, list)
+        list_for_each_entry_rcu(found, head, list) {
+                if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+                                    BTRFS_BLOCK_GROUP_SYSTEM))
+                        total_used_data += found->disk_total;
+                else
+                        total_used_data += found->disk_used;
                total_used += found->disk_used;
+        }
        rcu_read_unlock();
        buf->f_namelen = BTRFS_NAME_LEN;
        buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
        buf->f_bfree = buf->f_blocks - (total_used >> bits);
-        buf->f_bavail = buf->f_bfree;
+        buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_type = BTRFS_SUPER_MAGIC;
@@ -746,7 +761,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 static struct file_system_type btrfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "btrfs",
-        .get_sb         = btrfs_get_sb,
+        .mount          = btrfs_mount,
        .kill_sb        = kill_anon_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..1fffbc017bdf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
        TRANS_START,
        TRANS_JOIN,
        TRANS_USERSPACE,
+        TRANS_JOIN_NOLOCK,
 };
 static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
        struct btrfs_trans_handle *h;
        struct btrfs_transaction *cur_trans;
-        int retries = 0;
        int ret;
 again:
        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
        if (!h)
                return ERR_PTR(-ENOMEM);
-        mutex_lock(&root->fs_info->trans_mutex);
+        if (type != TRANS_JOIN_NOLOCK)
+                mutex_lock(&root->fs_info->trans_mutex);
        if (may_wait_transaction(root, type))
                wait_current_trans(root);
@@ -195,7 +196,8 @@ again:
        cur_trans = root->fs_info->running_transaction;
        cur_trans->use_count++;
-        mutex_unlock(&root->fs_info->trans_mutex);
+        if (type != TRANS_JOIN_NOLOCK)
+                mutex_unlock(&root->fs_info->trans_mutex);
        h->transid = cur_trans->transid;
        h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
        }
        if (num_items > 0) {
-                ret = btrfs_trans_reserve_metadata(h, root, num_items,
+                ret = btrfs_trans_reserve_metadata(h, root, num_items);
-                                                   &retries);
                if (ret == -EAGAIN) {
                        btrfs_commit_transaction(h, root);
                        goto again;
@@ -224,9 +225,11 @@ again:
                }
        }
-        mutex_lock(&root->fs_info->trans_mutex);
+        if (type != TRANS_JOIN_NOLOCK)
+                mutex_lock(&root->fs_info->trans_mutex);
        record_root_in_trans(h, root);
-        mutex_unlock(&root->fs_info->trans_mutex);
+        if (type != TRANS_JOIN_NOLOCK)
+                mutex_unlock(&root->fs_info->trans_mutex);
        if (!current->journal_info && type != TRANS_USERSPACE)
                current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
        return start_transaction(root, 0, TRANS_JOIN);
 }
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+                                                          int num_blocks)
+{
+        return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+}
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
                                                         int num_blocks)
 {
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
        return 0;
 }
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
+{
+        struct btrfs_transaction *cur_trans = NULL, *t;
+        int ret;
+        mutex_lock(&root->fs_info->trans_mutex);
+        ret = 0;
+        if (transid) {
+                if (transid <= root->fs_info->last_trans_committed)
+                        goto out_unlock;
+                /* find specified transaction */
+                list_for_each_entry(t, &root->fs_info->trans_list, list) {
+                        if (t->transid == transid) {
+                                cur_trans = t;
+                                break;
+                        }
+                        if (t->transid > transid)
+                                break;
+                }
+                ret = -EINVAL;
+                if (!cur_trans)
+                        goto out_unlock;  /* bad transid */
+        } else {
+                /* find newest transaction that is committing | committed */
+                list_for_each_entry_reverse(t, &root->fs_info->trans_list,
+                                            list) {
+                        if (t->in_commit) {
+                                if (t->commit_done)
+                                        goto out_unlock;
+                                cur_trans = t;
+                                break;
+                        }
+                }
+                if (!cur_trans)
+                        goto out_unlock;  /* nothing committing|committed */
+        }
+        cur_trans->use_count++;
+        mutex_unlock(&root->fs_info->trans_mutex);
+        wait_for_commit(root, cur_trans);
+        mutex_lock(&root->fs_info->trans_mutex);
+        put_transaction(cur_trans);
+        ret = 0;
+out_unlock:
+        mutex_unlock(&root->fs_info->trans_mutex);
+        return ret;
+}
 #if 0
 /*
 * rate limit against the drop_snapshot code.  This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 }
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, int throttle)
+                          struct btrfs_root *root, int throttle, int lock)
 {
        struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        btrfs_trans_release_metadata(trans, root);
-        if (!root->fs_info->open_ioctl_trans &&
+        if (lock && !root->fs_info->open_ioctl_trans &&
            should_end_transaction(trans, root))
                trans->transaction->blocked = 1;
-        if (cur_trans->blocked && !cur_trans->in_commit) {
+        if (lock && cur_trans->blocked && !cur_trans->in_commit) {
                if (throttle)
                        return btrfs_commit_transaction(trans, root);
                else
                        wake_up_process(info->transaction_kthread);
        }
-        mutex_lock(&info->trans_mutex);
+        if (lock)
+                mutex_lock(&info->trans_mutex);
        WARN_ON(cur_trans != info->running_transaction);
        WARN_ON(cur_trans->num_writers < 1);
        cur_trans->num_writers--;
+        smp_mb();
        if (waitqueue_active(&cur_trans->writer_wait))
                wake_up(&cur_trans->writer_wait);
        put_transaction(cur_trans);
-        mutex_unlock(&info->trans_mutex);
+        if (lock)
+                mutex_unlock(&info->trans_mutex);
        if (current->journal_info == trans)
                current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root)
 {
-        return __btrfs_end_transaction(trans, root, 0);
+        return __btrfs_end_transaction(trans, root, 0, 1);
 }
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-        return __btrfs_end_transaction(trans, root, 1);
+        return __btrfs_end_transaction(trans, root, 1, 1);
+}
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root)
+{
+        return __btrfs_end_transaction(trans, root, 0, 0);
 }
 /*
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        struct extent_buffer *tmp;
        struct extent_buffer *old;
        int ret;
-        int retries = 0;
        u64 to_reserve = 0;
        u64 index = 0;
        u64 objectid;
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        if (to_reserve > 0) {
                ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
-                                          to_reserve, &retries);
+                                          to_reserve);
                if (ret) {
                        pending->error = ret;
                        goto fail;
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
        super->root = root_item->bytenr;
        super->generation = root_item->generation;
        super->root_level = root_item->level;
+        if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+                super->cache_generation = root_item->generation;
 }
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
        return ret;
 }
+/*
+ * wait for the current transaction commit to start and block subsequent
+ * transaction joins
+ */
+static void wait_current_trans_commit_start(struct btrfs_root *root,
+                                            struct btrfs_transaction *trans)
+{
+        DEFINE_WAIT(wait);
+        if (trans->in_commit)
+                return;
+        while (1) {
+                prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+                                TASK_UNINTERRUPTIBLE);
+                if (trans->in_commit) {
+                        finish_wait(&root->fs_info->transaction_blocked_wait,
+                                    &wait);
+                        break;
+                }
+                mutex_unlock(&root->fs_info->trans_mutex);
+                schedule();
+                mutex_lock(&root->fs_info->trans_mutex);
+                finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+        }
+}
+/*
+ * wait for the current transaction to start and then become unblocked.
+ * caller holds ref.
+ */
+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
+                                         struct btrfs_transaction *trans)
+{
+        DEFINE_WAIT(wait);
+        if (trans->commit_done || (trans->in_commit && !trans->blocked))
+                return;
+        while (1) {
+                prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+                                TASK_UNINTERRUPTIBLE);
+                if (trans->commit_done ||
+                    (trans->in_commit && !trans->blocked)) {
+                        finish_wait(&root->fs_info->transaction_wait,
+                                    &wait);
+                        break;
+                }
+                mutex_unlock(&root->fs_info->trans_mutex);
+                schedule();
+                mutex_lock(&root->fs_info->trans_mutex);
+                finish_wait(&root->fs_info->transaction_wait,
+                            &wait);
+        }
+}
+/*
+ * commit transactions asynchronously. once btrfs_commit_transaction_async
+ * returns, any subsequent transaction will not be allowed to join.
+ */
+struct btrfs_async_commit {
+        struct btrfs_trans_handle *newtrans;
+        struct btrfs_root *root;
+        struct delayed_work work;
+};
+static void do_async_commit(struct work_struct *work)
+{
+        struct btrfs_async_commit *ac =
+                container_of(work, struct btrfs_async_commit, work.work);
+        btrfs_commit_transaction(ac->newtrans, ac->root);
+        kfree(ac);
+}
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   int wait_for_unblock)
+{
+        struct btrfs_async_commit *ac;
+        struct btrfs_transaction *cur_trans;
+        ac = kmalloc(sizeof(*ac), GFP_NOFS);
+        BUG_ON(!ac);
+        INIT_DELAYED_WORK(&ac->work, do_async_commit);
+        ac->root = root;
+        ac->newtrans = btrfs_join_transaction(root, 0);
+        /* take transaction reference */
+        mutex_lock(&root->fs_info->trans_mutex);
+        cur_trans = trans->transaction;
+        cur_trans->use_count++;
+        mutex_unlock(&root->fs_info->trans_mutex);
+        btrfs_end_transaction(trans, root);
+        schedule_delayed_work(&ac->work, 0);
+        /* wait for transaction to start and unblock */
+        mutex_lock(&root->fs_info->trans_mutex);
+        if (wait_for_unblock)
+                wait_current_trans_commit_start_and_unblock(root, cur_trans);
+        else
+                wait_current_trans_commit_start(root, cur_trans);
+        put_transaction(cur_trans);
+        mutex_unlock(&root->fs_info->trans_mutex);
+        return 0;
+}
+/*
+ * btrfs_transaction state sequence:
+ *    in_commit = 0, blocked = 0  (initial)
+ *    in_commit = 1, blocked = 1
+ *    blocked = 0
+ *    commit_done = 1
+ */
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
        unsigned long joined = 0;
-        unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
        DEFINE_WAIT(wait);
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        trans->transaction->in_commit = 1;
        trans->transaction->blocked = 1;
+        wake_up(&root->fs_info->transaction_blocked_wait);
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        snap_pending = 1;
                WARN_ON(cur_trans != trans->transaction);
-                if (cur_trans->num_writers > 1)
-                        timeout = MAX_SCHEDULE_TIMEOUT;
-                else if (should_grow)
-                        timeout = 1;
                mutex_unlock(&root->fs_info->trans_mutex);
                if (flush_on_commit || snap_pending) {
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                TASK_UNINTERRUPTIBLE);
                smp_mb();
-                if (cur_trans->num_writers > 1 || should_grow)
+                if (cur_trans->num_writers > 1)
-                        schedule_timeout(timeout);
+                        schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+                else if (should_grow)
+                        schedule_timeout(1);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bfd..f104b57ad4ef 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root);
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
                                                   int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
                                                  int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+                                                          int num_blocks);
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
                                                         int num_blocks);
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root);
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   int wait_for_unblock);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root);
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed7..992ab425599d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
        int ret = 0;
        int wret;
        int level;
-        int orig_level;
        int is_extent = 0;
        int next_key_ret = 0;
        u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        level = btrfs_header_level(root->node);
-        orig_level = level;
        if (level == 0)
                goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9c..a29f19384a27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 {
        struct inode *dir;
        int ret;
-        struct btrfs_key location;
        struct btrfs_inode_ref *ref;
        struct btrfs_dir_item *di;
        struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
        unsigned long ref_ptr;
        unsigned long ref_end;
-        location.objectid = key->objectid;
-        location.type = BTRFS_INODE_ITEM_KEY;
-        location.offset = 0;
        /*
         * it is possible that we didn't log all the parent directories
         * for a given inode.  If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
        struct btrfs_path *path;
        struct btrfs_root *root = wc->replay_dest;
        struct btrfs_key key;
-        u32 item_size;
        int level;
        int i;
        int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
        nritems = btrfs_header_nritems(eb);
        for (i = 0; i < nritems; i++) {
                btrfs_item_key_to_cpu(eb, &key, i);
-                item_size = btrfs_item_size_nr(eb, i);
                /* inode keys are done during the first stage */
                if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                   struct walk_control *wc)
 {
        u64 root_owner;
-        u64 root_gen;
        u64 bytenr;
        u64 ptr_gen;
        struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                parent = path->nodes[*level];
                root_owner = btrfs_header_owner(parent);
-                root_gen = btrfs_header_generation(parent);
                next = btrfs_find_create_tree_block(root, bytenr, blocksize);
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                 struct walk_control *wc)
 {
        u64 root_owner;
-        u64 root_gen;
        int i;
        int slot;
        int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
        for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
                slot = path->slots[i];
                if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
-                        struct extent_buffer *node;
-                        node = path->nodes[i];
                        path->slots[i]++;
                        *level = i;
                        WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                parent = path->nodes[*level + 1];
                        root_owner = btrfs_header_owner(parent);
-                        root_gen = btrfs_header_generation(parent);
                        wc->process_func(root, path->nodes[*level], wc,
                                 btrfs_header_generation(path->nodes[*level]));
                        if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
        }
        btrfs_end_log_trans(root);
-        return 0;
+        return err;
 }
 /* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct btrfs_key max_key;
        struct btrfs_root *log = root->log_root;
        struct extent_buffer *src = NULL;
-        u32 size;
        int err = 0;
        int ret;
        int nritems;
@@ -2793,7 +2779,6 @@ again:
                        break;
                src = path->nodes[0];
-                size = btrfs_item_size_nr(src, path->slots[0]);
                if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
                        ins_nr++;
                        goto next_slot;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e2..cc04dc1445d6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
        u64 size_to_free;
        struct btrfs_path *path;
        struct btrfs_key key;
-        struct btrfs_chunk *chunk;
        struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
        struct btrfs_trans_handle *trans;
        struct btrfs_key found_key;
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
                if (found_key.objectid != key.objectid)
                        break;
-                chunk = btrfs_item_ptr(path->nodes[0],
-                                       path->slots[0],
-                                       struct btrfs_chunk);
                /* chunk zero is special */
                if (found_key.offset == 0)
                        break;
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                }
                bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
                dev = multi->stripes[dev_nr].dev;
-                BUG_ON(rw == WRITE && !dev->writeable);
+                if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
-                if (dev && dev->bdev) {
                        bio->bi_bdev = dev->bdev;
                        if (async_submit)
                                schedule_bio(root, dev, rw, bio);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb215878..698fdd2c739c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        struct inode *inode = dentry->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_path *path;
-        struct btrfs_item *item;
        struct extent_buffer *leaf;
        struct btrfs_dir_item *di;
        int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                }
                advance = 1;
-                item = btrfs_item_nr(leaf, slot);
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
                /* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa239..b9cd5445f71c 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
        int nr_pages = 0;
        struct page *in_page = NULL;
        struct page *out_page = NULL;
-        int out_written = 0;
-        int in_read = 0;
        unsigned long bytes_left;
        *out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
        workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
        workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
-        out_written = 0;
-        in_read = 0;
        while (workspace->def_strm.total_in < len) {
                ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
                if (ret != Z_OK) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e0421891..08b460ae0539 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -635,7 +635,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 /*
 * mount: join the ceph cluster, and open root directory.
 */
-static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
+static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
                      const char *path)
 {
        int err;
@@ -678,16 +678,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
                }
        }
-        mnt->mnt_root = root;
-        mnt->mnt_sb = fsc->sb;
        fsc->mount_state = CEPH_MOUNT_MOUNTED;
        dout("mount success\n");
-        err = 0;
+        mutex_unlock(&fsc->client->mount_mutex);
+        return root;
 out:
        mutex_unlock(&fsc->client->mount_mutex);
-        return err;
+        return ERR_PTR(err);
 fail:
        if (first) {
@@ -777,41 +775,45 @@ static int ceph_register_bdi(struct super_block *sb,
        return err;
 }
-static int ceph_get_sb(struct file_system_type *fs_type,
+static struct dentry *ceph_mount(struct file_system_type *fs_type,
-                       int flags, const char *dev_name, void *data,
+                       int flags, const char *dev_name, void *data)
-                       struct vfsmount *mnt)
 {
        struct super_block *sb;
        struct ceph_fs_client *fsc;
+        struct dentry *res;
        int err;
        int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
        const char *path = NULL;
        struct ceph_mount_options *fsopt = NULL;
        struct ceph_options *opt = NULL;
-        dout("ceph_get_sb\n");
+        dout("ceph_mount\n");
        err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
-        if (err < 0)
+        if (err < 0) {
+                res = ERR_PTR(err);
                goto out_final;
+        }
        /* create client (which we may/may not use) */
        fsc = create_fs_client(fsopt, opt);
        if (IS_ERR(fsc)) {
-                err = PTR_ERR(fsc);
+                res = ERR_CAST(fsc);
                kfree(fsopt);
                kfree(opt);
                goto out_final;
        }
        err = ceph_mdsc_init(fsc);
-        if (err < 0)
+        if (err < 0) {
+                res = ERR_PTR(err);
                goto out;
+        }
        if (ceph_test_opt(fsc->client, NOSHARE))
                compare_super = NULL;
        sb = sget(fs_type, compare_super, ceph_set_super, fsc);
        if (IS_ERR(sb)) {
-                err = PTR_ERR(sb);
+                res = ERR_CAST(sb);
                goto out;
        }
@@ -823,16 +825,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
        } else {
                dout("get_sb using new client %p\n", fsc);
                err = ceph_register_bdi(sb, fsc);
-                if (err < 0)
+                if (err < 0) {
+                        res = ERR_PTR(err);
                        goto out_splat;
+                }
        }
-        err = ceph_mount(fsc, mnt, path);
+        res = ceph_real_mount(fsc, path);
-        if (err < 0)
+        if (IS_ERR(res))
                goto out_splat;
-        dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
+        dout("root %p inode %p ino %llx.%llx\n", res,
-             mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode));
+             res->d_inode, ceph_vinop(res->d_inode));
-        return 0;
+        return res;
 out_splat:
        ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +847,8 @@ out:
        ceph_mdsc_destroy(fsc);
        destroy_fs_client(fsc);
 out_final:
-        dout("ceph_get_sb fail %d\n", err);
+        dout("ceph_mount fail %ld\n", PTR_ERR(res));
-        return err;
+        return res;
 }
 static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +864,7 @@ static void ceph_kill_sb(struct super_block *s)
 static struct file_system_type ceph_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ceph",
-        .get_sb         = ceph_get_sb,
+        .mount          = ceph_mount,
        .kill_sb        = ceph_kill_sb,
        .fs_flags       = FS_RENAME_DOES_D_MOVE,
 };
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb2..0ed213970ced 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,9 @@ config CIFS
        tristate "CIFS support (advanced network filesystem, SMBFS successor)"
        depends on INET
        select NLS
+        select CRYPTO
+        select CRYPTO_MD5
+        select CRYPTO_ARC4
        help
          This is the client VFS module for the Common Internet File System
          (CIFS) protocol which is the successor to the Server Message Block
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c61e52..355abcdcda98 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
 v) mount check for unmatched uids
-w) Add support for new vfs entry points for setlease and fallocate 
+w) Add support for new vfs entry point for fallocate
 x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 
 processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59a4105..e9a393c9c2ca 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
 *   the GNU Lesser General Public License for more details.
 *
 */
-#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 #ifndef _CIFS_FS_SB_H
 #define _CIFS_FS_SB_H
@@ -42,9 +42,9 @@
 #define CIFS_MOUNT_MULTIUSER    0x20000 /* multiuser mount */
 struct cifs_sb_info {
-        struct radix_tree_root tlink_tree;
+        struct rb_root tlink_tree;
-#define CIFS_TLINK_MASTER_TAG           0       /* is "master" (mount) tcon */
        spinlock_t tlink_tree_lock;
+        struct tcon_link *master_tlink;
        struct nls_table *local_nls;
        unsigned int rsize;
        unsigned int wsize;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056294cf..f856732161ab 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,18 +43,32 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
                       unsigned char *p24);
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
-                                const struct session_key *key, char *signature)
+                                struct TCP_Server_Info *server, char *signature)
 {
-        struct  MD5Context context;
+        int rc;
-        if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
+        if (cifs_pdu == NULL || signature == NULL || server == NULL)
                return -EINVAL;
-        cifs_MD5_init(&context);
+        if (!server->secmech.sdescmd5) {
-        cifs_MD5_update(&context, (char *)&key->data, key->len);
+                cERROR(1, "%s: Can't generate signature\n", __func__);
-        cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+                return -1;
+        }
+        rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+        if (rc) {
+                cERROR(1, "%s: Oould not init md5\n", __func__);
+                return rc;
+        }
+        crypto_shash_update(&server->secmech.sdescmd5->shash,
+                server->session_key.response, server->session_key.len);
+        crypto_shash_update(&server->secmech.sdescmd5->shash,
+                cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+        rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
-        cifs_MD5_final(signature, &context);
        return 0;
 }
@@ -79,8 +93,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
        server->sequence_number++;
        spin_unlock(&GlobalMid_Lock);
-        rc = cifs_calculate_signature(cifs_pdu, &server->session_key,
+        rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
-                                      smb_signature);
        if (rc)
                memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
        else
@@ -90,16 +103,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 }
 static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
-                                const struct session_key *key, char *signature)
+                                struct TCP_Server_Info *server, char *signature)
 {
-        struct  MD5Context context;
        int i;
+        int rc;
-        if ((iov == NULL) || (signature == NULL) || (key == NULL))
+        if (iov == NULL || signature == NULL || server == NULL)
                return -EINVAL;
-        cifs_MD5_init(&context);
+        if (!server->secmech.sdescmd5) {
-        cifs_MD5_update(&context, (char *)&key->data, key->len);
+                cERROR(1, "%s: Can't generate signature\n", __func__);
+                return -1;
+        }
+        rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+        if (rc) {
+                cERROR(1, "%s: Oould not init md5\n", __func__);
+                return rc;
+        }
+        crypto_shash_update(&server->secmech.sdescmd5->shash,
+                server->session_key.response, server->session_key.len);
        for (i = 0; i < n_vec; i++) {
                if (iov[i].iov_len == 0)
                        continue;
@@ -112,18 +137,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
                if (i == 0) {
                        if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
                                break; /* nothing to sign or corrupt header */
-                        cifs_MD5_update(&context, iov[0].iov_base+4,
+                        crypto_shash_update(&server->secmech.sdescmd5->shash,
-                                  iov[0].iov_len-4);
+                                iov[i].iov_base + 4, iov[i].iov_len - 4);
                } else
-                        cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
+                        crypto_shash_update(&server->secmech.sdescmd5->shash,
+                                iov[i].iov_base, iov[i].iov_len);
        }
-        cifs_MD5_final(signature, &context);
+        rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
-        return 0;
+        return rc;
 }
 int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
                   __u32 *pexpected_response_sequence_number)
 {
@@ -146,8 +171,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
        server->sequence_number++;
        spin_unlock(&GlobalMid_Lock);
-        rc = cifs_calc_signature2(iov, n_vec, &server->session_key,
+        rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
-                                      smb_signature);
        if (rc)
                memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
        else
@@ -157,14 +181,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 }
 int cifs_verify_signature(struct smb_hdr *cifs_pdu,
-                          const struct session_key *session_key,
+                          struct TCP_Server_Info *server,
                          __u32 expected_sequence_number)
 {
        unsigned int rc;
        char server_response_sig[8];
        char what_we_think_sig_should_be[20];
-        if (cifs_pdu == NULL || session_key == NULL)
+        if (cifs_pdu == NULL || server == NULL)
                return -EINVAL;
        if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +217,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
                                        cpu_to_le32(expected_sequence_number);
        cifs_pdu->Signature.Sequence.Reserved = 0;
-        rc = cifs_calculate_signature(cifs_pdu, session_key,
+        rc = cifs_calculate_signature(cifs_pdu, server,
                what_we_think_sig_should_be);
        if (rc)
@@ -209,18 +233,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 }
-/* We fill in key by putting in 40 byte array which was allocated by caller */
+/* first calculate 24 bytes ntlm response and then 16 byte session key */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
+int setup_ntlm_response(struct cifsSesInfo *ses)
-                           const char *password)
 {
-        char temp_key[16];
+        unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
-        if ((key == NULL) || (rn == NULL))
+        char temp_key[CIFS_SESS_KEY_SIZE];
+        if (!ses)
                return -EINVAL;
-        E_md4hash(password, temp_key);
+        ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
-        mdfour(key->data.ntlm, temp_key, 16);
+        if (!ses->auth_key.response) {
-        memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE);
+                cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
-        key->len = 40;
+                return -ENOMEM;
+        }
+        ses->auth_key.len = temp_len;
+        SMBNTencrypt(ses->password, ses->server->cryptkey,
+                        ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+        E_md4hash(ses->password, temp_key);
+        mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
        return 0;
 }
@@ -294,15 +328,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
         * two times the unicode length of a server name +
         * size of a timestamp (which is 8 bytes).
         */
-        ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
+        ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
-        ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL);
+        ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
-        if (!ses->tiblob) {
+        if (!ses->auth_key.response) {
-                ses->tilen = 0;
+                ses->auth_key.len = 0;
                cERROR(1, "Challenge target info allocation failure");
                return -ENOMEM;
        }
-        blobptr = ses->tiblob;
+        blobptr = ses->auth_key.response;
        attrptr = (struct ntlmssp2_name *) blobptr;
        attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +391,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
 * about target string i.e. for some, just user name might suffice.
 */
 static int
-find_domain_name(struct cifsSesInfo *ses)
+find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
 {
        unsigned int attrsize;
        unsigned int type;
@@ -366,11 +400,11 @@ find_domain_name(struct cifsSesInfo *ses)
        unsigned char *blobend;
        struct ntlmssp2_name *attrptr;
-        if (!ses->tilen || !ses->tiblob)
+        if (!ses->auth_key.len || !ses->auth_key.response)
                return 0;
-        blobptr = ses->tiblob;
+        blobptr = ses->auth_key.response;
-        blobend = ses->tiblob + ses->tilen;
+        blobend = blobptr + ses->auth_key.len;
        while (blobptr + onesize < blobend) {
                attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +420,13 @@ find_domain_name(struct cifsSesInfo *ses)
                        if (!attrsize)
                                break;
                        if (!ses->domainName) {
-                                struct nls_table *default_nls;
                                ses->domainName =
                                        kmalloc(attrsize + 1, GFP_KERNEL);
                                if (!ses->domainName)
                                                return -ENOMEM;
-                                default_nls = load_nls_default();
                                cifs_from_ucs2(ses->domainName,
                                        (__le16 *)blobptr, attrsize, attrsize,
-                                        default_nls, false);
+                                        nls_cp, false);
-                                unload_nls(default_nls);
                                break;
                        }
                }
@@ -405,82 +436,136 @@ find_domain_name(struct cifsSesInfo *ses)
        return 0;
 }
-static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
+static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
                            const struct nls_table *nls_cp)
 {
        int rc = 0;
        int len;
-        char nt_hash[16];
+        char nt_hash[CIFS_NTHASH_SIZE];
-        struct HMACMD5Context *pctxt;
        wchar_t *user;
        wchar_t *domain;
+        wchar_t *server;
-        pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
+        if (!ses->server->secmech.sdeschmacmd5) {
+                cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
-        if (pctxt == NULL)
+                return -1;
-                return -ENOMEM;
+        }
        /* calculate md4 hash of password */
        E_md4hash(ses->password, nt_hash);
-        /* convert Domainname to unicode and uppercase */
+        crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
-        hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
+                                CIFS_NTHASH_SIZE);
+        rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+        if (rc) {
+                cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+                return rc;
+        }
        /* convert ses->userName to unicode and uppercase */
        len = strlen(ses->userName);
        user = kmalloc(2 + (len * 2), GFP_KERNEL);
-        if (user == NULL)
+        if (user == NULL) {
+                cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
+                rc = -ENOMEM;
                goto calc_exit_2;
+        }
        len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
        UniStrupr(user);
-        hmac_md5_update((char *)user, 2*len, pctxt);
+        crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+                                (char *)user, 2 * len);
        /* convert ses->domainName to unicode and uppercase */
        if (ses->domainName) {
                len = strlen(ses->domainName);
                domain = kmalloc(2 + (len * 2), GFP_KERNEL);
-                if (domain == NULL)
+                if (domain == NULL) {
+                        cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
+                        rc = -ENOMEM;
                        goto calc_exit_1;
+                }
                len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
                                        nls_cp);
-                /* the following line was removed since it didn't work well
+                crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
-                   with lower cased domain name that passed as an option.
+                                        (char *)domain, 2 * len);
-                   Maybe converting the domain name earlier makes sense */
-                /* UniStrupr(domain); */
-                hmac_md5_update((char *)domain, 2*len, pctxt);
                kfree(domain);
+        } else if (ses->serverName) {
+                len = strlen(ses->serverName);
+                server = kmalloc(2 + (len * 2), GFP_KERNEL);
+                if (server == NULL) {
+                        cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
+                        rc = -ENOMEM;
+                        goto calc_exit_1;
+                }
+                len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
+                                        nls_cp);
+                crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+                                        (char *)server, 2 * len);
+                kfree(server);
        }
+        rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+                                        ntlmv2_hash);
 calc_exit_1:
        kfree(user);
 calc_exit_2:
-        /* BB FIXME what about bytes 24 through 40 of the signing key?
+        return rc;
-           compare with the NTLM example */
+}
-        hmac_md5_final(ses->ntlmv2_hash, pctxt);
+static int
+CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
+{
+        int rc;
+        unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
+        if (!ses->server->secmech.sdeschmacmd5) {
+                cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+                return -1;
+        }
+        crypto_shash_setkey(ses->server->secmech.hmacmd5,
+                                ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+        rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+        if (rc) {
+                cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+                return rc;
+        }
+        if (ses->server->secType == RawNTLMSSP)
+                memcpy(ses->auth_key.response + offset,
+                        ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+        else
+                memcpy(ses->auth_key.response + offset,
+                        ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+        crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+                ses->auth_key.response + offset, ses->auth_key.len - offset);
+        rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+                ses->auth_key.response + CIFS_SESS_KEY_SIZE);
-        kfree(pctxt);
        return rc;
 }
 int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
+setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
-                      const struct nls_table *nls_cp)
 {
        int rc;
-        struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
+        int baselen;
-        struct HMACMD5Context context;
+        unsigned int tilen;
+        struct ntlmv2_resp *buf;
-        buf->blob_signature = cpu_to_le32(0x00000101);
+        char ntlmv2_hash[16];
-        buf->reserved = 0;
+        unsigned char *tiblob = NULL; /* target info blob */
-        buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-        get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
-        buf->reserved2 = 0;
        if (ses->server->secType == RawNTLMSSP) {
                if (!ses->domainName) {
-                        rc = find_domain_name(ses);
+                        rc = find_domain_name(ses, nls_cp);
                        if (rc) {
                                cERROR(1, "error %d finding domain name", rc);
                                goto setup_ntlmv2_rsp_ret;
@@ -490,51 +575,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
                rc = build_avpair_blob(ses, nls_cp);
                if (rc) {
                        cERROR(1, "error %d building av pair blob", rc);
-                        return rc;
+                        goto setup_ntlmv2_rsp_ret;
                }
        }
-        /* calculate buf->ntlmv2_hash */
+        baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
-        rc = calc_ntlmv2_hash(ses, nls_cp);
+        tilen = ses->auth_key.len;
+        tiblob = ses->auth_key.response;
+        ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
+        if (!ses->auth_key.response) {
+                rc = ENOMEM;
+                ses->auth_key.len = 0;
+                cERROR(1, "%s: Can't allocate auth blob", __func__);
+                goto setup_ntlmv2_rsp_ret;
+        }
+        ses->auth_key.len += baselen;
+        buf = (struct ntlmv2_resp *)
+                        (ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+        buf->blob_signature = cpu_to_le32(0x00000101);
+        buf->reserved = 0;
+        buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+        get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
+        buf->reserved2 = 0;
+        memcpy(ses->auth_key.response + baselen, tiblob, tilen);
+        /* calculate ntlmv2_hash */
+        rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
        if (rc) {
                cERROR(1, "could not get v2 hash rc %d", rc);
                goto setup_ntlmv2_rsp_ret;
        }
-        CalcNTLMv2_response(ses, resp_buf);
+        /* calculate first part of the client response (CR1) */
+        rc = CalcNTLMv2_response(ses, ntlmv2_hash);
+        if (rc) {
+                cERROR(1, "Could not calculate CR1  rc: %d", rc);
+                goto setup_ntlmv2_rsp_ret;
+        }
        /* now calculate the session key for NTLMv2 */
-        hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
+        crypto_shash_setkey(ses->server->secmech.hmacmd5,
-        hmac_md5_update(resp_buf, 16, &context);
+                ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
-        hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context);
+        rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+        if (rc) {
+                cERROR(1, "%s: Could not init hmacmd5\n", __func__);
+                goto setup_ntlmv2_rsp_ret;
+        }
-        memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf,
+        crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
-               sizeof(struct ntlmv2_resp));
+                ses->auth_key.response + CIFS_SESS_KEY_SIZE,
-        ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp);
+                CIFS_HMAC_MD5_HASH_SIZE);
-        return 0;
+        rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+                ses->auth_key.response);
 setup_ntlmv2_rsp_ret:
-        kfree(ses->tiblob);
+        kfree(tiblob);
-        ses->tiblob = NULL;
-        ses->tilen = 0;
        return rc;
 }
-void CalcNTLMv2_response(const struct cifsSesInfo *ses,
+int
-                         char *v2_session_response)
+calc_seckey(struct cifsSesInfo *ses)
 {
-        struct HMACMD5Context context;
+        int rc;
-        /* rest of v2 struct already generated */
+        struct crypto_blkcipher *tfm_arc4;
-        memcpy(v2_session_response + 8, ses->cryptKey, 8);
+        struct scatterlist sgin, sgout;
-        hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
+        struct blkcipher_desc desc;
+        unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+        get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
+        tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+        if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+                cERROR(1, "could not allocate crypto API arc4\n");
+                return PTR_ERR(tfm_arc4);
+        }
-        hmac_md5_update(v2_session_response+8,
+        desc.tfm = tfm_arc4;
-                        sizeof(struct ntlmv2_resp) - 8, &context);
-        if (ses->tilen)
+        crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
-                hmac_md5_update(ses->tiblob, ses->tilen, &context);
+                                        CIFS_SESS_KEY_SIZE);
-        hmac_md5_final(v2_session_response, &context);
+        sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
-/*      cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
+        sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+        rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
+        if (rc) {
+                cERROR(1, "could not encrypt session key rc: %d\n", rc);
+                crypto_free_blkcipher(tfm_arc4);
+                return rc;
+        }
+        /* make secondary_key/nonce as session key */
+        memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
+        /* and make len as that of session key only */
+        ses->auth_key.len = CIFS_SESS_KEY_SIZE;
+        crypto_free_blkcipher(tfm_arc4);
+        return 0;
+}
+void
+cifs_crypto_shash_release(struct TCP_Server_Info *server)
+{
+        if (server->secmech.md5)
+                crypto_free_shash(server->secmech.md5);
+        if (server->secmech.hmacmd5)
+                crypto_free_shash(server->secmech.hmacmd5);
+        kfree(server->secmech.sdeschmacmd5);
+        kfree(server->secmech.sdescmd5);
+}
+int
+cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+{
+        int rc;
+        unsigned int size;
+        server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
+        if (!server->secmech.hmacmd5 ||
+                        IS_ERR(server->secmech.hmacmd5)) {
+                cERROR(1, "could not allocate crypto hmacmd5\n");
+                return PTR_ERR(server->secmech.hmacmd5);
+        }
+        server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
+        if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
+                cERROR(1, "could not allocate crypto md5\n");
+                rc = PTR_ERR(server->secmech.md5);
+                goto crypto_allocate_md5_fail;
+        }
+        size = sizeof(struct shash_desc) +
+                        crypto_shash_descsize(server->secmech.hmacmd5);
+        server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
+        if (!server->secmech.sdeschmacmd5) {
+                cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
+                rc = -ENOMEM;
+                goto crypto_allocate_hmacmd5_sdesc_fail;
+        }
+        server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
+        server->secmech.sdeschmacmd5->shash.flags = 0x0;
+        size = sizeof(struct shash_desc) +
+                        crypto_shash_descsize(server->secmech.md5);
+        server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
+        if (!server->secmech.sdescmd5) {
+                cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
+                rc = -ENOMEM;
+                goto crypto_allocate_md5_sdesc_fail;
+        }
+        server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
+        server->secmech.sdescmd5->shash.flags = 0x0;
+        return 0;
+crypto_allocate_md5_sdesc_fail:
+        kfree(server->secmech.sdeschmacmd5);
+crypto_allocate_hmacmd5_sdesc_fail:
+        crypto_free_shash(server->secmech.md5);
+crypto_allocate_md5_fail:
+        crypto_free_shash(server->secmech.hmacmd5);
+        return rc;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 34371637f210..9c3789762ab7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
                return -ENOMEM;
        spin_lock_init(&cifs_sb->tlink_tree_lock);
-        INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL);
+        cifs_sb->tlink_tree = RB_ROOT;
        rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
        if (rc) {
@@ -318,12 +318,10 @@ cifs_alloc_inode(struct super_block *sb)
                return NULL;
        cifs_inode->cifsAttrs = 0x20;   /* default */
        cifs_inode->time = 0;
-        cifs_inode->write_behind_rc = 0;
        /* Until the file is open and we have gotten oplock
        info back from the server, can not assume caching of
        file data or metadata */
-        cifs_inode->clientCanCacheRead = false;
+        cifs_set_oplock_level(cifs_inode, 0);
-        cifs_inode->clientCanCacheAll = false;
        cifs_inode->delete_pending = false;
        cifs_inode->invalid_mapping = false;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
@@ -545,9 +543,9 @@ static const struct super_operations cifs_super_ops = {
 #endif
 };
-static int
+static struct dentry *
-cifs_get_sb(struct file_system_type *fs_type,
+cifs_do_mount(struct file_system_type *fs_type,
-            int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+            int flags, const char *dev_name, void *data)
 {
        int rc;
        struct super_block *sb;
@@ -557,18 +555,17 @@ cifs_get_sb(struct file_system_type *fs_type,
        cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
        if (IS_ERR(sb))
-                return PTR_ERR(sb);
+                return ERR_CAST(sb);
        sb->s_flags = flags;
        rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
        if (rc) {
                deactivate_locked_super(sb);
-                return rc;
+                return ERR_PTR(rc);
        }
        sb->s_flags |= MS_ACTIVE;
-        simple_set_mnt(mnt, sb);
+        return dget(sb->s_root);
-        return 0;
 }
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +631,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 struct file_system_type cifs_fs_type = {
        .owner = THIS_MODULE,
        .name = "cifs",
-        .get_sb = cifs_get_sb,
+        .mount = cifs_do_mount,
        .kill_sb = kill_anon_super,
        /*  .fs_flags */
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a16b42..897b2b2b28b5 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
-#define CIFS_VERSION   "1.67"
+#define CIFS_VERSION   "1.68"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f6f24..b577bf0a1bb3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
 #include <linux/workqueue.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
 /*
 * The sizes of various internal tables and strings
 */
@@ -74,7 +77,7 @@
 * CIFS vfs client Status information (based on what we know.)
 */
- /* associated with each tcp and smb session */
+/* associated with each tcp and smb session */
 enum statusEnum {
        CifsNew = 0,
        CifsGood,
@@ -99,14 +102,29 @@ enum protocolEnum {
 struct session_key {
        unsigned int len;
-        union {
+        char *response;
-                char ntlm[CIFS_SESS_KEY_SIZE + 16];
+};
-                char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */
-                struct {
+/* crypto security descriptor definition */
-                        char key[16];
+struct sdesc {
-                        struct ntlmv2_resp resp;
+        struct shash_desc shash;
-                } ntlmv2;
+        char ctx[];
-        } data;
+};
+/* crypto hashing related structure/fields, not specific to a sec mech */
+struct cifs_secmech {
+        struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
+        struct crypto_shash *md5; /* md5 hash function */
+        struct sdesc *sdeschmacmd5;  /* ctxt to generate ntlmv2 hash, CR1 */
+        struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
+};
+/* per smb session structure/fields */
+struct ntlmssp_auth {
+        __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
+        __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
+        unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
+        char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
 };
 struct cifs_cred {
@@ -179,12 +197,14 @@ struct TCP_Server_Info {
        int capabilities; /* allow selective disabling of caps by smb sess */
        int timeAdj;  /* Adjust for difference in server time zone in sec */
        __u16 CurrentMid;         /* multiplex id - rotating counter */
+        char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
        /* 16th byte of RFC1001 workstation name is always null */
        char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
        __u32 sequence_number; /* needed for CIFS PDU signature */
        struct session_key session_key;
        unsigned long lstrp; /* when we got last response from this server */
        u16 dialect; /* dialect index that server chose */
+        struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
        /* extended security flavors that server supports */
        bool    sec_kerberos;           /* supports plain Kerberos */
        bool    sec_mskerberos;         /* supports legacy MS Kerberos */
@@ -222,11 +242,8 @@ struct cifsSesInfo {
        char userName[MAX_USERNAME_SIZE + 1];
        char *domainName;
        char *password;
-        char cryptKey[CIFS_CRYPTO_KEY_SIZE];
        struct session_key auth_key;
-        char ntlmv2_hash[16];
+        struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
-        unsigned int tilen; /* length of the target info blob */
-        unsigned char *tiblob; /* target info blob in challenge response */
        bool need_reconnect:1; /* connection reset, uid now invalid */
 };
 /* no more than one of the following three session flags may be set */
@@ -319,7 +336,8 @@ struct cifsTconInfo {
 * "get" on the container.
 */
 struct tcon_link {
-        unsigned long           tl_index;
+        struct rb_node          tl_rbnode;
+        uid_t                   tl_uid;
        unsigned long           tl_flags;
 #define TCON_LINK_MASTER        0
 #define TCON_LINK_PENDING       1
@@ -395,16 +413,19 @@ struct cifsFileInfo {
        struct list_head llist; /* list of byte range locks we have. */
        bool invalidHandle:1;   /* file closed via session abend */
        bool oplock_break_cancelled:1;
-        atomic_t count;         /* reference count */
+        int count;              /* refcount protected by cifs_file_list_lock */
        struct mutex fh_mutex; /* prevents reopen race after dead ses*/
        struct cifs_search_info srch_inf;
        struct work_struct oplock_break; /* work for oplock breaks */
 };
-/* Take a reference on the file private data */
+/*
+ * Take a reference on the file private data. Must be called with
+ * cifs_file_list_lock held.
+ */
 static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 {
-        atomic_inc(&cifs_file->count);
+        ++cifs_file->count;
 }
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +438,6 @@ struct cifsInodeInfo {
        struct list_head lockList;
        /* BB add in lists for dirty pages i.e. write caching info for oplock */
        struct list_head openFileList;
-        int write_behind_rc;
        __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
        unsigned long time;     /* jiffies of last update/check of inode */
        bool clientCanCacheRead:1;      /* read oplock */
@@ -668,7 +688,7 @@ require use of the stronger protocol */
 *  GlobalMid_Lock protects:
 *      list operations on pending_mid_q and oplockQ
 *      updates to XID counters, multiplex id  and SMB sequence numbers
- *  GlobalSMBSesLock protects:
+ *  cifs_file_list_lock protects:
 *      list operations on tcp and SMB session lists and tCon lists
 *  f_owner.lock protects certain per file struct operations
 *  mapping->page_lock protects certain per page operations
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b5656d4c..de36b09763a8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
 #define CIFS_CRYPTO_KEY_SIZE (8)
 /*
+ * Size of the ntlm client response
+ */
+#define CIFS_AUTH_RESP_SIZE (24)
+/*
 * Size of the session key (crypto key encrypted with the password
 */
-#define CIFS_SESS_KEY_SIZE (24)
+#define CIFS_SESS_KEY_SIZE (16)
+#define CIFS_CLIENT_CHALLENGE_SIZE (8)
+#define CIFS_SERVER_CHALLENGE_SIZE (8)
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
 /*
 * Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40ba7ba..7ed69b6b5fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -104,6 +104,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
 extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
                                      int offset);
+extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
                                struct file *file, struct tcon_link *tlink,
@@ -362,13 +363,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
 extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
                          __u32 *);
 extern int cifs_verify_signature(struct smb_hdr *,
-                                 const struct session_key *session_key,
+                                 struct TCP_Server_Info *server,
                                __u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
+extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
-                                 const char *pass);
+extern int setup_ntlm_response(struct cifsSesInfo *);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
+extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
+extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
-                             const struct nls_table *);
+extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
+extern int calc_seckey(struct cifsSesInfo *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(const char *password, const char *cryptkey,
                                bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f317b15..2f2632b6df5a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -503,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
                if (rsp->EncryptionKeyLength ==
                                cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
-                        memcpy(ses->cryptKey, rsp->EncryptionKey,
+                        memcpy(ses->server->cryptkey, rsp->EncryptionKey,
                                CIFS_CRYPTO_KEY_SIZE);
                } else if (server->secMode & SECMODE_PW_ENCRYPT) {
                        rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
        server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
        server->timeAdj *= 60;
        if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
-                memcpy(ses->cryptKey, pSMBr->u.EncryptionKey,
+                memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
                       CIFS_CRYPTO_KEY_SIZE);
        } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
                        && (pSMBr->EncryptionKeyLength == 0)) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176acb58..251a17c03545 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -116,6 +116,7 @@ struct smb_vol {
 static int ipv4_connect(struct TCP_Server_Info *server);
 static int ipv6_connect(struct TCP_Server_Info *server);
+static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
 static void cifs_prune_tlinks(struct work_struct *work);
 /*
@@ -175,6 +176,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
        }
        server->sequence_number = 0;
        server->session_estab = false;
+        kfree(server->session_key.response);
+        server->session_key.response = NULL;
+        server->session_key.len = 0;
        spin_lock(&GlobalMid_Lock);
        list_for_each(tmp, &server->pending_mid_q) {
@@ -1064,7 +1068,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                        }
                        i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
                                                 value, strlen(value));
-                        if (i < 0) {
+                        if (i == 0) {
                                printk(KERN_WARNING "CIFS:  Could not parse"
                                       " srcaddr: %s\n",
                                       value);
@@ -1560,8 +1564,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
        server->tcpStatus = CifsExiting;
        spin_unlock(&GlobalMid_Lock);
+        cifs_crypto_shash_release(server);
        cifs_fscache_release_client_cookie(server);
+        kfree(server->session_key.response);
+        server->session_key.response = NULL;
+        server->session_key.len = 0;
        task = xchg(&server->tsk, NULL);
        if (task)
                force_sig(SIGKILL, task);
@@ -1614,10 +1623,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                goto out_err;
        }
+        rc = cifs_crypto_shash_allocate(tcp_ses);
+        if (rc) {
+                cERROR(1, "could not setup hash structures rc %d", rc);
+                goto out_err;
+        }
        tcp_ses->hostname = extract_hostname(volume_info->UNC);
        if (IS_ERR(tcp_ses->hostname)) {
                rc = PTR_ERR(tcp_ses->hostname);
-                goto out_err;
+                goto out_err_crypto_release;
        }
        tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1661,7 +1676,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        }
        if (rc < 0) {
                cERROR(1, "Error connecting to socket. Aborting operation");
-                goto out_err;
+                goto out_err_crypto_release;
        }
        /*
@@ -1675,7 +1690,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                rc = PTR_ERR(tcp_ses->tsk);
                cERROR(1, "error %d create cifsd thread", rc);
                module_put(THIS_MODULE);
-                goto out_err;
+                goto out_err_crypto_release;
        }
        /* thread spawned, put it on the list */
@@ -1687,6 +1702,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        return tcp_ses;
+out_err_crypto_release:
+        cifs_crypto_shash_release(tcp_ses);
 out_err:
        if (tcp_ses) {
                if (!IS_ERR(tcp_ses->hostname))
@@ -1801,8 +1819,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
        if (ses == NULL)
                goto get_ses_fail;
-        ses->tilen = 0;
-        ses->tiblob = NULL;
        /* new SMB session uses our server ref */
        ses->server = server;
        if (server->addr.sockAddr6.sin6_family == AF_INET6)
@@ -1823,10 +1839,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
                        goto get_ses_fail;
        }
        if (volume_info->domainname) {
-                int len = strlen(volume_info->domainname);
+                ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
-                ses->domainName = kmalloc(len + 1, GFP_KERNEL);
+                if (!ses->domainName)
-                if (ses->domainName)
+                        goto get_ses_fail;
-                        strcpy(ses->domainName, volume_info->domainname);
        }
        ses->cred_uid = volume_info->cred_uid;
        ses->linux_uid = volume_info->linux_uid;
@@ -2886,24 +2901,16 @@ remote_path_check:
                goto mount_fail_check;
        }
-        tlink->tl_index = pSesInfo->linux_uid;
+        tlink->tl_uid = pSesInfo->linux_uid;
        tlink->tl_tcon = tcon;
        tlink->tl_time = jiffies;
        set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
        set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
-        rc = radix_tree_preload(GFP_KERNEL);
+        cifs_sb->master_tlink = tlink;
-        if (rc == -ENOMEM) {
-                kfree(tlink);
-                goto mount_fail_check;
-        }
        spin_lock(&cifs_sb->tlink_tree_lock);
-        radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink);
+        tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
-        radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
-                           CIFS_TLINK_MASTER_TAG);
        spin_unlock(&cifs_sb->tlink_tree_lock);
-        radix_tree_preload_end();
        queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
                                TLINK_IDLE_EXPIRE);
@@ -2985,13 +2992,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
                if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
                    (ses->server->secType == LANMAN))
-                        calc_lanman_hash(tcon->password, ses->cryptKey,
+                        calc_lanman_hash(tcon->password, ses->server->cryptkey,
                                         ses->server->secMode &
                                            SECMODE_PW_ENCRYPT ? true : false,
                                         bcc_ptr);
                else
 #endif /* CIFS_WEAK_PW_HASH */
-                SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr);
+                SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
                bcc_ptr += CIFS_SESS_KEY_SIZE;
                if (ses->capabilities & CAP_UNICODE) {
@@ -3093,32 +3100,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 int
 cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 {
-        int i, ret;
+        struct rb_root *root = &cifs_sb->tlink_tree;
+        struct rb_node *node;
+        struct tcon_link *tlink;
        char *tmp;
-        struct tcon_link *tlink[8];
-        unsigned long index = 0;
        cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
-        do {
+        spin_lock(&cifs_sb->tlink_tree_lock);
-                spin_lock(&cifs_sb->tlink_tree_lock);
+        while ((node = rb_first(root))) {
-                ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
+                tlink = rb_entry(node, struct tcon_link, tl_rbnode);
-                                             (void **)tlink, index,
+                cifs_get_tlink(tlink);
-                                             ARRAY_SIZE(tlink));
+                clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
-                /* increment index for next pass */
+                rb_erase(node, root);
-                if (ret > 0)
-                        index = tlink[ret - 1]->tl_index + 1;
-                for (i = 0; i < ret; i++) {
-                        cifs_get_tlink(tlink[i]);
-                        clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
-                        radix_tree_delete(&cifs_sb->tlink_tree,
-                                                        tlink[i]->tl_index);
-                }
-                spin_unlock(&cifs_sb->tlink_tree_lock);
-                for (i = 0; i < ret; i++)
+                spin_unlock(&cifs_sb->tlink_tree_lock);
-                        cifs_put_tlink(tlink[i]);
+                cifs_put_tlink(tlink);
-        } while (ret != 0);
+                spin_lock(&cifs_sb->tlink_tree_lock);
+        }
+        spin_unlock(&cifs_sb->tlink_tree_lock);
        tmp = cifs_sb->prepath;
        cifs_sb->prepathlen = 0;
@@ -3178,10 +3178,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
        } else {
                mutex_lock(&ses->server->srv_mutex);
                if (!server->session_estab) {
-                        memcpy(&server->session_key.data,
+                        server->session_key.response = ses->auth_key.response;
-                                &ses->auth_key.data, ses->auth_key.len);
                        server->session_key.len = ses->auth_key.len;
-                        ses->server->session_estab = true;
+                        server->sequence_number = 0x2;
+                        server->session_estab = true;
+                        ses->auth_key.response = NULL;
                }
                mutex_unlock(&server->srv_mutex);
@@ -3192,6 +3193,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
                spin_unlock(&GlobalMid_Lock);
        }
+        kfree(ses->auth_key.response);
+        ses->auth_key.response = NULL;
+        ses->auth_key.len = 0;
+        kfree(ses->ntlmssp);
+        ses->ntlmssp = NULL;
        return rc;
 }
@@ -3250,22 +3257,10 @@ out:
        return tcon;
 }
-static struct tcon_link *
+static inline struct tcon_link *
 cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
 {
-        struct tcon_link *tlink;
+        return cifs_sb->master_tlink;
-        unsigned int ret;
-        spin_lock(&cifs_sb->tlink_tree_lock);
-        ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
-                                        0, 1, CIFS_TLINK_MASTER_TAG);
-        spin_unlock(&cifs_sb->tlink_tree_lock);
-        /* the master tcon should always be present */
-        if (ret == 0)
-                BUG();
-        return tlink;
 }
 struct cifsTconInfo *
@@ -3281,6 +3276,47 @@ cifs_sb_tcon_pending_wait(void *unused)
        return signal_pending(current) ? -ERESTARTSYS : 0;
 }
+/* find and return a tlink with given uid */
+static struct tcon_link *
+tlink_rb_search(struct rb_root *root, uid_t uid)
+{
+        struct rb_node *node = root->rb_node;
+        struct tcon_link *tlink;
+        while (node) {
+                tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+                if (tlink->tl_uid > uid)
+                        node = node->rb_left;
+                else if (tlink->tl_uid < uid)
+                        node = node->rb_right;
+                else
+                        return tlink;
+        }
+        return NULL;
+}
+/* insert a tcon_link into the tree */
+static void
+tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        struct tcon_link *tlink;
+        while (*new) {
+                tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
+                parent = *new;
+                if (tlink->tl_uid > new_tlink->tl_uid)
+                        new = &((*new)->rb_left);
+                else
+                        new = &((*new)->rb_right);
+        }
+        rb_link_node(&new_tlink->tl_rbnode, parent, new);
+        rb_insert_color(&new_tlink->tl_rbnode, root);
+}
 /*
 * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
 * current task.
@@ -3288,7 +3324,7 @@ cifs_sb_tcon_pending_wait(void *unused)
 * If the superblock doesn't refer to a multiuser mount, then just return
 * the master tcon for the mount.
 *
- * First, search the radix tree for an existing tcon for this fsuid. If one
+ * First, search the rbtree for an existing tcon for this fsuid. If one
 * exists, then check to see if it's pending construction. If it is then wait
 * for construction to complete. Once it's no longer pending, check to see if
 * it failed and either return an error or retry construction, depending on
@@ -3301,14 +3337,14 @@ struct tcon_link *
 cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
 {
        int ret;
-        unsigned long fsuid = (unsigned long) current_fsuid();
+        uid_t fsuid = current_fsuid();
        struct tcon_link *tlink, *newtlink;
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
        spin_lock(&cifs_sb->tlink_tree_lock);
-        tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+        tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
        if (tlink)
                cifs_get_tlink(tlink);
        spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3317,36 +3353,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
                newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
                if (newtlink == NULL)
                        return ERR_PTR(-ENOMEM);
-                newtlink->tl_index = fsuid;
+                newtlink->tl_uid = fsuid;
                newtlink->tl_tcon = ERR_PTR(-EACCES);
                set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
                set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
                cifs_get_tlink(newtlink);
-                ret = radix_tree_preload(GFP_KERNEL);
-                if (ret != 0) {
-                        kfree(newtlink);
-                        return ERR_PTR(ret);
-                }
                spin_lock(&cifs_sb->tlink_tree_lock);
                /* was one inserted after previous search? */
-                tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+                tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
                if (tlink) {
                        cifs_get_tlink(tlink);
                        spin_unlock(&cifs_sb->tlink_tree_lock);
-                        radix_tree_preload_end();
                        kfree(newtlink);
                        goto wait_for_construction;
                }
-                ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
-                spin_unlock(&cifs_sb->tlink_tree_lock);
-                radix_tree_preload_end();
-                if (ret) {
-                        kfree(newtlink);
-                        return ERR_PTR(ret);
-                }
                tlink = newtlink;
+                tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
+                spin_unlock(&cifs_sb->tlink_tree_lock);
        } else {
 wait_for_construction:
                ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3392,39 +3416,39 @@ cifs_prune_tlinks(struct work_struct *work)
 {
        struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
                                                    prune_tlinks.work);
-        struct tcon_link *tlink[8];
+        struct rb_root *root = &cifs_sb->tlink_tree;
-        unsigned long now = jiffies;
+        struct rb_node *node = rb_first(root);
-        unsigned long index = 0;
+        struct rb_node *tmp;
-        int i, ret;
+        struct tcon_link *tlink;
-        do {
+        /*
-                spin_lock(&cifs_sb->tlink_tree_lock);
+         * Because we drop the spinlock in the loop in order to put the tlink
-                ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
+         * it's not guarded against removal of links from the tree. The only
-                                             (void **)tlink, index,
+         * places that remove entries from the tree are this function and
-                                             ARRAY_SIZE(tlink));
+         * umounts. Because this function is non-reentrant and is canceled
-                /* increment index for next pass */
+         * before umount can proceed, this is safe.
-                if (ret > 0)
+         */
-                        index = tlink[ret - 1]->tl_index + 1;
+        spin_lock(&cifs_sb->tlink_tree_lock);
-                for (i = 0; i < ret; i++) {
+        node = rb_first(root);
-                        if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) ||
+        while (node != NULL) {
-                            atomic_read(&tlink[i]->tl_count) != 0 ||
+                tmp = node;
-                            time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE,
+                node = rb_next(tmp);
-                                       now)) {
+                tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
-                                tlink[i] = NULL;
-                                continue;
+                if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
-                        }
+                    atomic_read(&tlink->tl_count) != 0 ||
-                        cifs_get_tlink(tlink[i]);
+                    time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
-                        clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
+                        continue;
-                        radix_tree_delete(&cifs_sb->tlink_tree,
-                                          tlink[i]->tl_index);
-                }
-                spin_unlock(&cifs_sb->tlink_tree_lock);
-                for (i = 0; i < ret; i++) {
+                cifs_get_tlink(tlink);
-                        if (tlink[i] != NULL)
+                clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
-                                cifs_put_tlink(tlink[i]);
+                rb_erase(tmp, root);
-                }
-        } while (ret != 0);
+                spin_unlock(&cifs_sb->tlink_tree_lock);
+                cifs_put_tlink(tlink);
+                spin_lock(&cifs_sb->tlink_tree_lock);
+        }
+        spin_unlock(&cifs_sb->tlink_tree_lock);
        queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
                                TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 45af003865d2..06c3e83fa387 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -131,8 +131,7 @@ static inline int cifs_open_inode_helper(struct inode *inode,
                        /* BB no need to lock inode until after invalidate
                        since namei code should already have it locked? */
                        rc = filemap_write_and_wait(inode->i_mapping);
-                        if (rc != 0)
+                        mapping_set_error(inode->i_mapping, rc);
-                                pCifsInode->write_behind_rc = rc;
                }
                cFYI(1, "invalidating remote inode since open detected it "
                         "changed");
@@ -147,12 +146,7 @@ client_can_cache:
                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
                                         xid, NULL);
-        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+        cifs_set_oplock_level(pCifsInode, oplock);
-                pCifsInode->clientCanCacheAll = true;
-                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock granted on inode %p", inode);
-        } else if ((oplock & 0xF) == OPLOCK_READ)
-                pCifsInode->clientCanCacheRead = true;
        return rc;
 }
@@ -232,6 +226,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
        if (pCifsFile == NULL)
                return pCifsFile;
+        pCifsFile->count = 1;
        pCifsFile->netfid = fileHandle;
        pCifsFile->pid = current->tgid;
        pCifsFile->uid = current_fsuid();
@@ -242,7 +237,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
        mutex_init(&pCifsFile->fh_mutex);
        mutex_init(&pCifsFile->lock_mutex);
        INIT_LIST_HEAD(&pCifsFile->llist);
-        atomic_set(&pCifsFile->count, 1);
        INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
        spin_lock(&cifs_file_list_lock);
@@ -254,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
                list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
        spin_unlock(&cifs_file_list_lock);
-        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+        cifs_set_oplock_level(pCifsInode, oplock);
-                pCifsInode->clientCanCacheAll = true;
-                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock inode %p", inode);
-        } else if ((oplock & 0xF) == OPLOCK_READ)
-                pCifsInode->clientCanCacheRead = true;
        file->private_data = pCifsFile;
        return pCifsFile;
@@ -267,16 +256,18 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 /*
 * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server.
+ * the filehandle out on the server. Must be called without holding
+ * cifs_file_list_lock.
 */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
+        struct inode *inode = cifs_file->dentry->d_inode;
        struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
-        struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode);
+        struct cifsInodeInfo *cifsi = CIFS_I(inode);
        struct cifsLockInfo *li, *tmp;
        spin_lock(&cifs_file_list_lock);
-        if (!atomic_dec_and_test(&cifs_file->count)) {
+        if (--cifs_file->count > 0) {
                spin_unlock(&cifs_file_list_lock);
                return;
        }
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
        if (list_empty(&cifsi->openFileList)) {
                cFYI(1, "closing last open instance for inode %p",
                        cifs_file->dentry->d_inode);
-                cifsi->clientCanCacheRead = false;
+                cifs_set_oplock_level(cifsi, 0);
-                cifsi->clientCanCacheAll  = false;
        }
        spin_unlock(&cifs_file_list_lock);
@@ -605,11 +595,8 @@ reopen_success:
        if (can_flush) {
                rc = filemap_write_and_wait(inode->i_mapping);
-                if (rc != 0)
+                mapping_set_error(inode->i_mapping, rc);
-                        CIFS_I(inode)->write_behind_rc = rc;
-                pCifsInode->clientCanCacheAll = false;
-                pCifsInode->clientCanCacheRead = false;
                if (tcon->unix_ext)
                        rc = cifs_get_inode_info_unix(&inode,
                                full_path, inode->i_sb, xid);
@@ -623,18 +610,9 @@ reopen_success:
             invalidate the current end of file on the server
             we can not go to the server to get the new inod
             info */
-        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                pCifsInode->clientCanCacheAll = true;
+        cifs_set_oplock_level(pCifsInode, oplock);
-                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock granted on inode %p",
-                         pCifsFile->dentry->d_inode);
-        } else if ((oplock & 0xF) == OPLOCK_READ) {
-                pCifsInode->clientCanCacheRead = true;
-                pCifsInode->clientCanCacheAll = false;
-        } else {
-                pCifsInode->clientCanCacheRead = false;
-                pCifsInode->clientCanCacheAll = false;
-        }
        cifs_relock_file(pCifsFile);
 reopen_error_exit:
@@ -776,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
-        if (file->private_data == NULL) {
-                rc = -EBADF;
-                FreeXid(xid);
-                return rc;
-        }
        netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
        if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -957,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        size_t write_size, loff_t *poffset)
 {
+        struct inode *inode = file->f_path.dentry->d_inode;
        int rc = 0;
        unsigned int bytes_written = 0;
        unsigned int total_written;
@@ -964,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        struct cifsTconInfo *pTcon;
        int xid, long_op;
        struct cifsFileInfo *open_file;
-        struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
+        struct cifsInodeInfo *cifsi = CIFS_I(inode);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1030,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        cifs_stats_bytes_written(pTcon, total_written);
-        /* since the write may have blocked check these pointers again */
-        if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
-                struct inode *inode = file->f_path.dentry->d_inode;
 /* Do not update local mtime - server will set its actual value on write
- *              inode->i_ctime = inode->i_mtime =
+ *      inode->i_ctime = inode->i_mtime =
- *                      current_fs_time(inode->i_sb);*/
+ *              current_fs_time(inode->i_sb);*/
-                if (total_written > 0) {
+        if (total_written > 0) {
-                        spin_lock(&inode->i_lock);
+                spin_lock(&inode->i_lock);
-                        if (*poffset > file->f_path.dentry->d_inode->i_size)
+                if (*poffset > inode->i_size)
-                                i_size_write(file->f_path.dentry->d_inode,
+                        i_size_write(inode, *poffset);
-                                        *poffset);
+                spin_unlock(&inode->i_lock);
-                        spin_unlock(&inode->i_lock);
-                }
-                mark_inode_dirty_sync(file->f_path.dentry->d_inode);
        }
+        mark_inode_dirty_sync(inode);
        FreeXid(xid);
        return total_written;
 }
@@ -1179,7 +1148,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
                                        bool fsuid_only)
 {
        struct cifsFileInfo *open_file;
-        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+        struct cifs_sb_info *cifs_sb;
        bool any_available = false;
        int rc;
@@ -1193,6 +1162,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
                return NULL;
        }
+        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
        /* only filter by fsuid on multiuser mounts */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                fsuid_only = false;
@@ -1353,6 +1324,7 @@ static int cifs_writepages(struct address_space *mapping,
        if (!experimEnabled && tcon->ses->server->secMode &
                        (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
                cifsFileInfo_put(open_file);
+                kfree(iov);
                return generic_writepages(mapping, wbc);
        }
        cifsFileInfo_put(open_file);
@@ -1478,12 +1450,7 @@ retry:
                        if (rc || bytes_written < bytes_to_write) {
                                cERROR(1, "Write2 ret %d, wrote %d",
                                          rc, bytes_written);
-                                /* BB what if continued retry is
+                                mapping_set_error(mapping, rc);
-                                   requested via mount flags? */
-                                if (rc == -ENOSPC)
-                                        set_bit(AS_ENOSPC, &mapping->flags);
-                                else
-                                        set_bit(AS_EIO, &mapping->flags);
                        } else {
                                cifs_stats_bytes_written(tcon, bytes_written);
                        }
@@ -1628,11 +1595,10 @@ int cifs_fsync(struct file *file, int datasync)
        rc = filemap_write_and_wait(inode->i_mapping);
        if (rc == 0) {
-                rc = CIFS_I(inode)->write_behind_rc;
+                struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-                CIFS_I(inode)->write_behind_rc = 0;
                tcon = tlink_tcon(smbfile->tlink);
-                if (!rc && tcon && smbfile &&
+                if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
-                   !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
                        rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
        }
@@ -1677,21 +1643,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
        struct inode *inode = file->f_path.dentry->d_inode;
        int rc = 0;
-        /* Rather than do the steps manually:
+        if (file->f_mode & FMODE_WRITE)
-           lock the inode for writing
+                rc = filemap_write_and_wait(inode->i_mapping);
-           loop through pages looking for write behind data (dirty pages)
-           coalesce into contiguous 16K (or smaller) chunks to write to server
-           send to server (prefer in parallel)
-           deal with writebehind errors
-           unlock inode for writing
-           filemapfdatawrite appears easier for the time being */
-        rc = filemap_fdatawrite(inode->i_mapping);
-        /* reset wb rc if we were able to write out dirty pages */
-        if (!rc) {
-                rc = CIFS_I(inode)->write_behind_rc;
-                CIFS_I(inode)->write_behind_rc = 0;
-        }
        cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
@@ -2270,7 +2223,7 @@ void cifs_oplock_break(struct work_struct *work)
                                                  oplock_break);
        struct inode *inode = cfile->dentry->d_inode;
        struct cifsInodeInfo *cinode = CIFS_I(inode);
-        int rc, waitrc = 0;
+        int rc = 0;
        if (inode && S_ISREG(inode->i_mode)) {
                if (cinode->clientCanCacheRead)
@@ -2279,13 +2232,10 @@ void cifs_oplock_break(struct work_struct *work)
                        break_lease(inode, O_WRONLY);
                rc = filemap_fdatawrite(inode->i_mapping);
                if (cinode->clientCanCacheRead == 0) {
-                        waitrc = filemap_fdatawait(inode->i_mapping);
+                        rc = filemap_fdatawait(inode->i_mapping);
+                        mapping_set_error(inode->i_mapping, rc);
                        invalidate_remote_inode(inode);
                }
-                if (!rc)
-                        rc = waitrc;
-                if (rc)
-                        cinode->write_behind_rc = rc;
                cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
        }
@@ -2304,7 +2254,7 @@ void cifs_oplock_break(struct work_struct *work)
        /*
         * We might have kicked in before is_valid_oplock_break()
         * finished grabbing reference for us.  Make sure it's done by
-         * waiting for GlobalSMSSeslock.
+         * waiting for cifs_file_list_lock.
         */
        spin_lock(&cifs_file_list_lock);
        spin_unlock(&cifs_file_list_lock);
@@ -2312,6 +2262,7 @@ void cifs_oplock_break(struct work_struct *work)
        cifs_oplock_break_put(cfile);
 }
+/* must be called while holding cifs_file_list_lock */
 void cifs_oplock_break_get(struct cifsFileInfo *cfile)
 {
        cifs_sb_active(cfile->dentry->d_sb);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 94979309698a..ef3a55bf86b6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1682,8 +1682,7 @@ cifs_invalidate_mapping(struct inode *inode)
        /* write back any cached data */
        if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
                rc = filemap_write_and_wait(inode->i_mapping);
-                if (rc)
+                mapping_set_error(inode->i_mapping, rc);
-                        cifs_i->write_behind_rc = rc;
        }
        invalidate_remote_inode(inode);
        cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1942,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
         * the flush returns error?
         */
        rc = filemap_write_and_wait(inode->i_mapping);
-        if (rc != 0) {
+        mapping_set_error(inode->i_mapping, rc);
-                cifsInode->write_behind_rc = rc;
+        rc = 0;
-                rc = 0;
-        }
        if (attrs->ia_valid & ATTR_SIZE) {
                rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2084,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
         * the flush returns error?
         */
        rc = filemap_write_and_wait(inode->i_mapping);
-        if (rc != 0) {
+        mapping_set_error(inode->i_mapping, rc);
-                cifsInode->write_behind_rc = rc;
+        rc = 0;
-                rc = 0;
-        }
        if (attrs->ia_valid & ATTR_SIZE) {
                rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2182,7 +2177,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
        setattr_copy(inode, attrs);
        mark_inode_dirty(inode);
-        return 0;
 cifs_setattr_exit:
        kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf756f342..0c98672d0122 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
        struct cifs_sb_info *cifs_sb;
 #ifdef CONFIG_CIFS_POSIX
        struct cifsFileInfo *pSMBFile = filep->private_data;
-        struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink);
+        struct cifsTconInfo *tcon;
        __u64   ExtAttrBits = 0;
        __u64   ExtAttrMask = 0;
-        __u64   caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
+        __u64   caps;
 #endif /* CONFIG_CIFS_POSIX */
        xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        break;
 #ifdef CONFIG_CIFS_POSIX
                case FS_IOC_GETFLAGS:
+                        if (pSMBFile == NULL)
+                                break;
+                        tcon = tlink_tcon(pSMBFile->tlink);
+                        caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
                        if (CIFS_UNIX_EXTATTR_CAP & caps) {
-                                if (pSMBFile == NULL)
-                                        break;
                                rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
                                        &ExtAttrBits, &ExtAttrMask);
                                if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        break;
                case FS_IOC_SETFLAGS:
+                        if (pSMBFile == NULL)
+                                break;
+                        tcon = tlink_tcon(pSMBFile->tlink);
+                        caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
                        if (CIFS_UNIX_EXTATTR_CAP & caps) {
                                if (get_user(ExtAttrBits, (int __user *)arg)) {
                                        rc = -EFAULT;
                                        break;
                                }
-                                if (pSMBFile == NULL)
-                                        break;
                                /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
                                        extAttrBits, &ExtAttrMask);*/
                        }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6a6803..43f10281bc19 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,15 +569,14 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                                cFYI(1, "file id match, oplock break");
                                pCifsInode = CIFS_I(netfile->dentry->d_inode);
-                                pCifsInode->clientCanCacheAll = false;
-                                if (pSMB->OplockLevel == 0)
-                                        pCifsInode->clientCanCacheRead = false;
+                                cifs_set_oplock_level(pCifsInode,
+                                                      pSMB->OplockLevel);
                                /*
                                 * cifs_oplock_break_put() can't be called
                                 * from here.  Get reference after queueing
                                 * succeeded.  cifs_oplock_break() will
-                                 * synchronize using GlobalSMSSeslock.
+                                 * synchronize using cifs_file_list_lock.
                                 */
                                if (queue_work(system_nrt_wq,
                                               &netfile->oplock_break))
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
                           cifs_sb_master_tcon(cifs_sb)->treeName);
        }
 }
+void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
+{
+        oplock &= 0xF;
+        if (oplock == OPLOCK_EXCLUSIVE) {
+                cinode->clientCanCacheAll = true;
+                cinode->clientCanCacheRead = true;
+                cFYI(1, "Exclusive Oplock granted on inode %p",
+                     &cinode->vfs_inode);
+        } else if (oplock == OPLOCK_READ) {
+                cinode->clientCanCacheAll = false;
+                cinode->clientCanCacheRead = true;
+                cFYI(1, "Level II Oplock granted on inode %p",
+                    &cinode->vfs_inode);
+        } else {
+                cinode->clientCanCacheAll = false;
+                cinode->clientCanCacheRead = false;
+        }
+}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd96592..7b01d3f6eed6 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
 #include <linux/slab.h>
 #include "cifs_spnego.h"
-extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
-                         unsigned char *p24);
 /*
 * Checks if this is the first smb session to be reconnected after
 * the socket has been reestablished (so we know whether to use vc 0).
@@ -402,23 +399,22 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
                return -EINVAL;
        }
-        memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
+        memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
        /* BB we could decode pblob->NegotiateFlags; some may be useful */
        /* In particular we can examine sign flags */
        /* BB spec says that if AvId field of MsvAvTimestamp is populated then
                we must set the MIC field of the AUTHENTICATE_MESSAGE */
+        ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
        tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
        tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
-        ses->tilen = tilen;
+        if (tilen) {
-        if (ses->tilen) {
+                ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
-                ses->tiblob = kmalloc(tilen, GFP_KERNEL);
+                if (!ses->auth_key.response) {
-                if (!ses->tiblob) {
                        cERROR(1, "Challenge target info allocation failure");
-                        ses->tilen = 0;
                        return -ENOMEM;
                }
-                memcpy(ses->tiblob,  bcc_ptr + tioffset, ses->tilen);
+                memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
+                ses->auth_key.len = tilen;
        }
        return 0;
@@ -443,10 +439,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
                NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
                NTLMSSP_NEGOTIATE_NTLM;
        if (ses->server->secMode &
-           (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+                        (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
                flags |= NTLMSSP_NEGOTIATE_SIGN;
-        if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
+                if (!ses->server->session_estab)
-                flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+                        flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
+                                NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+        }
        sec_blob->NegotiateFlags |= cpu_to_le32(flags);
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
                                   const struct nls_table *nls_cp)
 {
        int rc;
-        unsigned int size;
        AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
        __u32 flags;
        unsigned char *tmp;
-        struct ntlmv2_resp ntlmv2_response = {};
        memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
        sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
        sec_blob->LmChallengeResponse.MaximumLength = 0;
        sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
-        rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
+        rc = setup_ntlmv2_rsp(ses, nls_cp);
        if (rc) {
                cERROR(1, "Error %d during NTLMSSP authentication", rc);
                goto setup_ntlmv2_ret;
        }
-        size =  sizeof(struct ntlmv2_resp);
+        memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
-        memcpy(tmp, (char *)&ntlmv2_response, size);
+                        ses->auth_key.len - CIFS_SESS_KEY_SIZE);
-        tmp += size;
+        tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
-        if (ses->tilen > 0) {
-                memcpy(tmp, ses->tiblob, ses->tilen);
-                tmp += ses->tilen;
-        }
-        sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen);
+        sec_blob->NtChallengeResponse.Length =
+                        cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
        sec_blob->NtChallengeResponse.MaximumLength =
-                                cpu_to_le16(size + ses->tilen);
+                        cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
-        kfree(ses->tiblob);
-        ses->tiblob = NULL;
-        ses->tilen = 0;
        if (ses->domainName == NULL) {
                sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,9 +544,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
        sec_blob->WorkstationName.MaximumLength = 0;
        tmp += 2;
-        sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+        if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
-        sec_blob->SessionKey.Length = 0;
+                        !calc_seckey(ses)) {
-        sec_blob->SessionKey.MaximumLength = 0;
+                memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+                sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+                sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
+                sec_blob->SessionKey.MaximumLength =
+                                cpu_to_le16(CIFS_CPHTXT_SIZE);
+                tmp += CIFS_CPHTXT_SIZE;
+        } else {
+                sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+                sec_blob->SessionKey.Length = 0;
+                sec_blob->SessionKey.MaximumLength = 0;
+        }
 setup_ntlmv2_ret:
        *buflen = tmp - pbuffer;
@@ -600,8 +600,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
                return -EINVAL;
        type = ses->server->secType;
        cFYI(1, "sess setup type %d", type);
+        if (type == RawNTLMSSP) {
+                /* if memory allocation is successful, caller of this function
+                 * frees it.
+                 */
+                ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+                if (!ses->ntlmssp)
+                        return -ENOMEM;
+        }
 ssetup_ntlmssp_authenticate:
        if (phase == NtLmChallenge)
                phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +674,14 @@ ssetup_ntlmssp_authenticate:
                /* no capabilities flags in old lanman negotiation */
                pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
-                /* BB calculate hash with password */
-                /* and copy into bcc */
-                calc_lanman_hash(ses->password, ses->cryptKey,
+                /* Calculate hash with password and copy into bcc_ptr.
+                 * Encryption Key (stored as in cryptkey) gets used if the
+                 * security mode bit in Negottiate Protocol response states
+                 * to use challenge/response method (i.e. Password bit is 1).
+                 */
+                calc_lanman_hash(ses->password, ses->server->cryptkey,
                                 ses->server->secMode & SECMODE_PW_ENCRYPT ?
                                        true : false, lnm_session_key);
@@ -687,24 +699,27 @@ ssetup_ntlmssp_authenticate:
                ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
 #endif
        } else if (type == NTLM) {
-                char ntlm_session_key[CIFS_SESS_KEY_SIZE];
                pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
                pSMB->req_no_secext.CaseInsensitivePasswordLength =
-                        cpu_to_le16(CIFS_SESS_KEY_SIZE);
+                        cpu_to_le16(CIFS_AUTH_RESP_SIZE);
                pSMB->req_no_secext.CaseSensitivePasswordLength =
-                        cpu_to_le16(CIFS_SESS_KEY_SIZE);
+                        cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+                /* calculate ntlm response and session key */
+                rc = setup_ntlm_response(ses);
+                if (rc) {
+                        cERROR(1, "Error %d during NTLM authentication", rc);
+                        goto ssetup_exit;
+                }
-                /* calculate session key */
+                /* copy ntlm response */
-                SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key);
+                memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+                                CIFS_AUTH_RESP_SIZE);
+                bcc_ptr += CIFS_AUTH_RESP_SIZE;
+                memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+                                CIFS_AUTH_RESP_SIZE);
+                bcc_ptr += CIFS_AUTH_RESP_SIZE;
-                cifs_calculate_session_key(&ses->auth_key,
-                                        ntlm_session_key, ses->password);
-                /* copy session key */
-                memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
-                bcc_ptr += CIFS_SESS_KEY_SIZE;
-                memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
-                bcc_ptr += CIFS_SESS_KEY_SIZE;
                if (ses->capabilities & CAP_UNICODE) {
                        /* unicode strings must be word aligned */
                        if (iov[0].iov_len % 2) {
@@ -715,47 +730,26 @@ ssetup_ntlmssp_authenticate:
                } else
                        ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
        } else if (type == NTLMv2) {
-                char *v2_sess_key =
-                        kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
-                /* BB FIXME change all users of v2_sess_key to
-                   struct ntlmv2_resp */
-                if (v2_sess_key == NULL) {
-                        rc = -ENOMEM;
-                        goto ssetup_exit;
-                }
                pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
                /* LM2 password would be here if we supported it */
                pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
-                /*      cpu_to_le16(LM2_SESS_KEY_SIZE); */
-                /* calculate session key */
+                /* calculate nlmv2 response and session key */
-                rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+                rc = setup_ntlmv2_rsp(ses, nls_cp);
                if (rc) {
                        cERROR(1, "Error %d during NTLMv2 authentication", rc);
-                        kfree(v2_sess_key);
                        goto ssetup_exit;
                }
-                memcpy(bcc_ptr, (char *)v2_sess_key,
+                memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
-                                sizeof(struct ntlmv2_resp));
+                                ses->auth_key.len - CIFS_SESS_KEY_SIZE);
-                bcc_ptr += sizeof(struct ntlmv2_resp);
+                bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
-                kfree(v2_sess_key);
                /* set case sensitive password length after tilen may get
                 * assigned, tilen is 0 otherwise.
                 */
                pSMB->req_no_secext.CaseSensitivePasswordLength =
-                        cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen);
+                        cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
-                if (ses->tilen > 0) {
-                        memcpy(bcc_ptr, ses->tiblob, ses->tilen);
-                        bcc_ptr += ses->tilen;
-                        /* we never did allocate ses->domainName to free */
-                        kfree(ses->tiblob);
-                        ses->tiblob = NULL;
-                        ses->tilen = 0;
-                }
                if (ses->capabilities & CAP_UNICODE) {
                        if (iov[0].iov_len % 2) {
@@ -768,6 +762,7 @@ ssetup_ntlmssp_authenticate:
        } else if (type == Kerberos) {
 #ifdef CONFIG_CIFS_UPCALL
                struct cifs_spnego_msg *msg;
                spnego_key = cifs_get_spnego_key(ses);
                if (IS_ERR(spnego_key)) {
                        rc = PTR_ERR(spnego_key);
@@ -785,16 +780,17 @@ ssetup_ntlmssp_authenticate:
                        rc = -EKEYREJECTED;
                        goto ssetup_exit;
                }
-                /* bail out if key is too long */
-                if (msg->sesskey_len >
+                ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
-                    sizeof(ses->auth_key.data.krb5)) {
+                if (!ses->auth_key.response) {
-                        cERROR(1, "Kerberos signing key too long (%u bytes)",
+                        cERROR(1, "Kerberos can't allocate (%u bytes) memory",
-                                msg->sesskey_len);
+                                        msg->sesskey_len);
-                        rc = -EOVERFLOW;
+                        rc = -ENOMEM;
                        goto ssetup_exit;
                }
+                memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
                ses->auth_key.len = msg->sesskey_len;
-                memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len);
                pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
                capabilities |= CAP_EXTENDED_SECURITY;
                pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -897,8 +893,6 @@ ssetup_ntlmssp_authenticate:
                          CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
        /* SMB request buf freed in SendReceive2 */
-        cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
        pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
        smb_buf = (struct smb_hdr *)iov[0].iov_base;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91eb6eb4..e0588cdf4cc5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
                    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
                                             SECMODE_SIGN_ENABLED))) {
                        rc = cifs_verify_signature(midQ->resp_buf,
-                                                &ses->server->session_key,
+                                                ses->server,
                                                midQ->sequence_number+1);
                        if (rc) {
                                cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
                    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
                                             SECMODE_SIGN_ENABLED))) {
                        rc = cifs_verify_signature(out_buf,
-                                                &ses->server->session_key,
+                                                ses->server,
                                                midQ->sequence_number+1);
                        if (rc) {
                                cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
            (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
                                     SECMODE_SIGN_ENABLED))) {
                rc = cifs_verify_signature(out_buf,
-                                           &ses->server->session_key,
+                                           ses->server,
                                           midQ->sequence_number+1);
                if (rc) {
                        cERROR(1, "Unexpected SMB signature");
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96ca348..5ea57c8c7f97 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
 /* init_coda: used by filesystems.c to register coda */
-static int coda_get_sb(struct file_system_type *fs_type,
+static struct dentry *coda_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
+        return mount_nodev(fs_type, flags, data, coda_fill_super);
 }
 struct file_system_type coda_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "coda",
-        .get_sb         = coda_get_sb,
+        .mount          = coda_mount,
        .kill_sb        = kill_anon_super,
        .fs_flags       = FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/compat.c b/fs/compat.c
index 52cfeb61da77..c580c322fa6b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -49,6 +49,7 @@
 #include <linux/eventpoll.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -606,14 +607,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
        /*
         * Single unix specification:
         * We should -EINVAL if an element length is not >= 0 and fitting an
-         * ssize_t.  The total length is fitting an ssize_t
+         * ssize_t.
         *
-         * Be careful here because iov_len is a size_t not an ssize_t
+         * In Linux, the total length is limited to MAX_RW_COUNT, there is
+         * no overflow possibility.
         */
        tot_len = 0;
        ret = -EINVAL;
        for (seg = 0; seg < nr_segs; seg++) {
-                compat_ssize_t tmp = tot_len;
                compat_uptr_t buf;
                compat_ssize_t len;
@@ -624,13 +625,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
                }
                if (len < 0)    /* size_t not fitting in compat_ssize_t .. */
                        goto out;
-                tot_len += len;
-                if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
-                        goto out;
                if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
                        ret = -EFAULT;
                        goto out;
                }
+                if (len > MAX_RW_COUNT - tot_len)
+                        len = MAX_RW_COUNT - tot_len;
+                tot_len += len;
                iov->iov_base = compat_ptr(buf);
                iov->iov_len = (compat_size_t) len;
                uvector++;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2d..7d3607febe1c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
        return 0;
 }
-static int configfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
+        return mount_single(fs_type, flags, data, configfs_fill_super);
 }
 static struct file_system_type configfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "configfs",
-        .get_sb         = configfs_get_sb,
+        .mount          = configfs_do_mount,
        .kill_sb        = kill_litter_super,
 };
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d33..32fd5fe9ca0e 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
        .statfs         = cramfs_statfs,
 };
-static int cramfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *cramfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
-                           mnt);
 }
 static struct file_system_type cramfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "cramfs",
-        .get_sb         = cramfs_get_sb,
+        .mount          = cramfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index a4ed8380e98a..37a8ca7c1222 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -135,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
        return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
-static int debug_get_sb(struct file_system_type *fs_type,
+static struct dentry *debug_mount(struct file_system_type *fs_type,
                        int flags, const char *dev_name,
-                        void *data, struct vfsmount *mnt)
+                        void *data)
 {
-        return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
+        return mount_single(fs_type, flags, data, debug_fill_super);
 }
 static struct file_system_type debug_fs_type = {
        .owner =        THIS_MODULE,
        .name =         "debugfs",
-        .get_sb =       debug_get_sb,
+        .mount =        debug_mount,
        .kill_sb =      kill_litter_super,
 };
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b5235..1bb547c9cad6 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
 }
 /*
- * devpts_get_sb()
+ * devpts_mount()
 *
 *     If the '-o newinstance' mount option was specified, mount a new
 *     (private) instance of devpts.  PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
 *     semantics in devpts while preserving backward compatibility of the
 *     current 'single-namespace' semantics. i.e all mounts of devpts
 *     without the 'newinstance' mount option should bind to the initial
- *     kernel mount, like get_sb_single().
+ *     kernel mount, like mount_single().
 *
 *     Mounts with 'newinstance' option create a new, private namespace.
 *
 *     NOTE:
 *
- *     For single-mount semantics, devpts cannot use get_sb_single(),
+ *     For single-mount semantics, devpts cannot use mount_single(),
- *     because get_sb_single()/sget() find and use the super-block from
+ *     because mount_single()/sget() find and use the super-block from
 *     the most recent mount of devpts. But that recent mount may be a
- *     'newinstance' mount and get_sb_single() would pick the newinstance
+ *     'newinstance' mount and mount_single() would pick the newinstance
 *     super-block instead of the initial super-block.
 */
-static int devpts_get_sb(struct file_system_type *fs_type,
+static struct dentry *devpts_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
        int error;
        struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
        error = parse_mount_options(data, PARSE_MOUNT, &opts);
        if (error)
-                return error;
+                return ERR_PTR(error);
        if (opts.newinstance)
                s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
                s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
        if (IS_ERR(s))
-                return PTR_ERR(s);
+                return ERR_CAST(s);
        if (!s->s_root) {
                s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
        if (error)
                goto out_undo_sget;
-        simple_set_mnt(mnt, s);
+        return dget(s->s_root);
-        return 0;
 out_undo_sget:
        deactivate_locked_super(s);
-        return error;
+        return ERR_PTR(error);
 }
 #else
@@ -404,10 +402,10 @@ out_undo_sget:
 * This supports only the legacy single-instance semantics (no
 * multiple-instance semantics)
 */
-static int devpts_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
-                const char *dev_name, void *data, struct vfsmount *mnt)
+                const char *dev_name, void *data)
 {
-        return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+        return mount_single(fs_type, flags, data, devpts_fill_super);
 }
 #endif
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
 static struct file_system_type devpts_fs_type = {
        .name           = "devpts",
-        .get_sb         = devpts_get_sb,
+        .mount          = devpts_mount,
        .kill_sb        = devpts_kill_sb,
 };
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b959429..413a3c48f0bb 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
 #define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES      0x00000010
 #define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK   0x00000020
 #define ECRYPTFS_GLOBAL_ENCFN_USE_FEK          0x00000040
+#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY    0x00000080
        u32 flags;
        struct list_head global_auth_tok_list;
        struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc94203380..9d1a22d62765 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
 #include <linux/crypto.h>
 #include <linux/fs_stack.h>
 #include <linux/slab.h>
+#include <linux/xattr.h>
 #include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
        struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
        struct dentry *dentry_save;
        struct vfsmount *vfsmount_save;
+        unsigned int flags_save;
        int rc;
        dentry_save = nd->path.dentry;
        vfsmount_save = nd->path.mnt;
+        flags_save = nd->flags;
        nd->path.dentry = lower_dentry;
        nd->path.mnt = lower_mnt;
+        nd->flags &= ~LOOKUP_OPEN;
        rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
        nd->path.dentry = dentry_save;
        nd->path.mnt = vfsmount_save;
+        nd->flags = flags_save;
        return rc;
 }
@@ -1108,10 +1113,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                rc = -EOPNOTSUPP;
                goto out;
        }
-        mutex_lock(&lower_dentry->d_inode->i_mutex);
-        rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
+        rc = vfs_setxattr(lower_dentry, name, value, size, flags);
-                                                   size, flags);
-        mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
        return rc;
 }
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cfa2ea4..b1f6858a5223 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
 */
 static int
 ecryptfs_find_auth_tok_for_sig(
+        struct key **auth_tok_key,
        struct ecryptfs_auth_tok **auth_tok,
        struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
        char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
        struct ecryptfs_global_auth_tok *global_auth_tok;
        int rc = 0;
+        (*auth_tok_key) = NULL;
        (*auth_tok) = NULL;
        if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
                                                  mount_crypt_stat, sig)) {
-                struct key *auth_tok_key;
-                rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
+                /* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
+                 * mount_crypt_stat structure, we prevent to use auth toks that
+                 * are not inserted through the ecryptfs_add_global_auth_tok
+                 * function.
+                 */
+                if (mount_crypt_stat->flags
+                                & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+                        return -EINVAL;
+                rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
                                                       sig);
        } else
                (*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
                             char *filename, size_t filename_size)
 {
        struct ecryptfs_write_tag_70_packet_silly_stack *s;
+        struct key *auth_tok_key = NULL;
        int rc = 0;
        s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
        }
        dest[s->i++] = s->cipher_code;
        rc = ecryptfs_find_auth_tok_for_sig(
+                &auth_tok_key,
                &s->auth_tok, mount_crypt_stat,
                mount_crypt_stat->global_default_fnek_sig);
        if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
 out_unlock:
        mutex_unlock(s->tfm_mutex);
 out:
+        if (auth_tok_key)
+                key_put(auth_tok_key);
        kfree(s);
        return rc;
 }
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
                             char *data, size_t max_packet_size)
 {
        struct ecryptfs_parse_tag_70_packet_silly_stack *s;
+        struct key *auth_tok_key = NULL;
        int rc = 0;
        (*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
         * >= ECRYPTFS_MAX_IV_BYTES. */
        memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
        s->desc.info = s->iv;
-        rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat,
+        rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+                                            &s->auth_tok, mount_crypt_stat,
                                            s->fnek_sig_hex);
        if (rc) {
                printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
                (*filename_size) = 0;
                (*filename) = NULL;
        }
+        if (auth_tok_key)
+                key_put(auth_tok_key);
        kfree(s);
        return rc;
 }
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
                       ECRYPTFS_VERSION_MAJOR,
                       ECRYPTFS_VERSION_MINOR);
                rc = -EINVAL;
-                goto out;
+                goto out_release_key;
        }
        if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
            && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
                printk(KERN_ERR "Invalid auth_tok structure "
                       "returned from key query\n");
                rc = -EINVAL;
-                goto out;
+                goto out_release_key;
+        }
+out_release_key:
+        if (rc) {
+                key_put(*auth_tok_key);
+                (*auth_tok_key) = NULL;
        }
 out:
        return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
        struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
        size_t tag_11_contents_size;
        size_t tag_11_packet_size;
+        struct key *auth_tok_key = NULL;
        int rc = 0;
        INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
         * just one will be sufficient to decrypt to get the FEK. */
 find_next_matching_auth_tok:
        found_auth_tok = 0;
+        if (auth_tok_key) {
+                key_put(auth_tok_key);
+                auth_tok_key = NULL;
+        }
        list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
                candidate_auth_tok = &auth_tok_list_item->auth_tok;
                if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
                        rc = -EINVAL;
                        goto out_wipe_list;
                }
-                ecryptfs_find_auth_tok_for_sig(&matching_auth_tok,
+                rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+                                               &matching_auth_tok,
                                               crypt_stat->mount_crypt_stat,
                                               candidate_auth_tok_sig);
-                if (matching_auth_tok) {
+                if (!rc) {
                        found_auth_tok = 1;
                        goto found_matching_auth_tok;
                }
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
 out_wipe_list:
        wipe_auth_tok_list(&auth_tok_list);
 out:
+        if (auth_tok_key)
+                key_put(auth_tok_key);
        return rc;
 }
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18adb20..a9dbd62518e6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
       ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
       ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
       ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
-       ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
+       ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+       ecryptfs_opt_err };
 static const match_table_t tokens = {
        {ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
        {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
        {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
        {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
+        {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
        {ecryptfs_opt_err, NULL}
 };
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
                case ecryptfs_opt_unlink_sigs:
                        mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
                        break;
+                case ecryptfs_opt_mount_auth_tok_only:
+                        mount_crypt_stat->flags |=
+                                ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
+                        break;
                case ecryptfs_opt_err:
                default:
                        printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
 *                        ecryptfs_interpose to perform most of the linking
 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
 */
-static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
-                        const char *dev_name, void *raw_data,
+                        const char *dev_name, void *raw_data)
-                        struct vfsmount *mnt)
 {
        struct super_block *s;
        struct ecryptfs_sb_info *sbi;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
                err = "Reading sb failed";
                goto out;
        }
-        simple_set_mnt(mnt, s);
+        return dget(s->s_root);
-        return 0;
 out:
        if (sbi) {
@@ -616,7 +620,7 @@ out:
                kmem_cache_free(ecryptfs_sb_info_cache, sbi);
        }
        printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
-        return rc;
+        return ERR_PTR(rc);
 }
 /**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
 static struct file_system_type ecryptfs_fs_type = {
        .owner = THIS_MODULE,
        .name = "ecryptfs",
-        .get_sb = ecryptfs_get_sb,
+        .mount = ecryptfs_mount,
        .kill_sb = ecryptfs_kill_block_super,
        .fs_flags = 0
 };
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286a3aa9..253732382d37 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -180,6 +180,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
                seq_printf(m, ",ecryptfs_encrypted_view");
        if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
                seq_printf(m, ",ecryptfs_unlink_sigs");
+        if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+                seq_printf(m, ",ecryptfs_mount_auth_tok_only");
        return 0;
 }
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f04942810818..5073a07652cc 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
 static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int efs_fill_super(struct super_block *s, void *d, int silent);
-static int efs_get_sb(struct file_system_type *fs_type,
+static struct dentry *efs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
 }
 static struct file_system_type efs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "efs",
-        .get_sb         = efs_get_sb,
+        .mount          = efs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af8..79c3ae6e0456 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
 /*
 * Set up the superblock (calls exofs_fill_super eventually)
 */
-static int exofs_get_sb(struct file_system_type *type,
+static struct dentry *exofs_mount(struct file_system_type *type,
                          int flags, const char *dev_name,
-                          void *data, struct vfsmount *mnt)
+                          void *data)
 {
        struct exofs_mountopt opts;
        int ret;
        ret = parse_options(data, &opts);
        if (ret)
-                return ret;
+                return ERR_PTR(ret);
        opts.dev_name = dev_name;
-        return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
+        return mount_nodev(type, flags, &opts, exofs_fill_super);
 }
 /*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
 static struct file_system_type exofs_type = {
        .owner          = THIS_MODULE,
        .name           = "exofs",
-        .get_sb         = exofs_get_sb,
+        .mount          = exofs_mount,
        .kill_sb        = generic_shutdown_super,
 };
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0901320671da..d89e0b6a2d78 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1356,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
        return 0;
 }
-static int ext2_get_sb(struct file_system_type *fs_type,
+static struct dentry *ext2_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
 }
 #ifdef CONFIG_QUOTA
@@ -1473,7 +1473,7 @@ out:
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext2",
-        .get_sb         = ext2_get_sb,
+        .mount          = ext2_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index db87413d3479..2fedaf8b5012 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -3020,16 +3020,16 @@ out:
 #endif
-static int ext3_get_sb(struct file_system_type *fs_type,
+static struct dentry *ext3_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
 }
 static struct file_system_type ext3_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext3",
-        .get_sb         = ext3_get_sb,
+        .mount          = ext3_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8b5dd6369f82..6a5edea2d70b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -177,7 +177,7 @@ struct mpage_da_data {
 struct ext4_io_page {
        struct page     *p_page;
-        int             p_count;
+        atomic_t        p_count;
 };
 #define MAX_IO_PAGES 128
@@ -858,6 +858,7 @@ struct ext4_inode_info {
        spinlock_t i_completed_io_lock;
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
+        atomic_t i_ioend_count; /* Number of outstanding io_end structs */
        /*
         * Transactions that contain inode's metadata needed to complete
@@ -2060,6 +2061,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
 extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern int ext4_end_io_nolock(ext4_io_end_t *io);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2d6c6c8c036d..bdbe69902207 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -53,6 +53,7 @@
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
+        trace_ext4_begin_ordered_truncate(inode, new_size);
        return jbd2_journal_begin_ordered_truncate(
                                        EXT4_SB(inode->i_sb)->s_journal,
                                        &EXT4_I(inode)->jinode,
@@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
        handle_t *handle;
        int err;
+        trace_ext4_evict_inode(inode);
        if (inode->i_nlink) {
                truncate_inode_pages(&inode->i_data, 0);
                goto no_delete;
@@ -2718,7 +2720,7 @@ static int ext4_writepage(struct page *page,
         * try to create them using __block_write_begin.  If this
         * fails, redirty the page and move on.
         */
-        if (!page_buffers(page)) {
+        if (!page_has_buffers(page)) {
                if (__block_write_begin(page, 0, len,
                                        noalloc_get_block_write)) {
                redirty_page:
@@ -2732,12 +2734,10 @@ static int ext4_writepage(struct page *page,
        if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
                              ext4_bh_delay_or_unwritten)) {
                /*
-                 * We don't want to do block allocation So redirty the
+                 * We don't want to do block allocation, so redirty
-                 * page and return We may reach here when we do a
+                 * the page and return.  We may reach here when we do
-                 * journal commit via
+                 * a journal commit via journal_submit_inode_data_buffers.
-                 * journal_submit_inode_data_buffers.  If we don't
+                 * We can also reach here via shrink_page_list
-                 * have mapping block we just ignore them. We can also
-                 * reach here via shrink_page_list
                 */
                goto redirty_page;
        }
@@ -5412,9 +5412,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
         * will return the blocks that include the delayed allocation
         * blocks for this file.
         */
-        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
        return 0;
@@ -5651,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
        int err, ret;
        might_sleep();
+        trace_ext4_mark_inode_dirty(inode, _RET_IP_);
        err = ext4_reserve_inode_write(handle, inode, &iloc);
        if (ext4_handle_valid(handle) &&
            EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c58eba34724a..5b4d4e3a4d58 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4640,8 +4640,6 @@ do_more:
                 * with group lock held. generate_buddy look at
                 * them with group lock_held
                 */
-                if (test_opt(sb, DISCARD))
-                        ext4_issue_discard(sb, block_group, bit, count);
                ext4_lock_group(sb, block_group);
                mb_clear_bits(bitmap_bh->b_data, bit, count);
                mb_free_blocks(inode, &e4b, bit, count);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 46a7d6a9d976..7f5451cd1d38 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,8 +32,14 @@
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
+#define WQ_HASH_SZ              37
+#define to_ioend_wq(v)  (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
 int __init ext4_init_pageio(void)
 {
+        int i;
        io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
        if (io_page_cachep == NULL)
                return -ENOMEM;
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void)
                kmem_cache_destroy(io_page_cachep);
                return -ENOMEM;
        }
+        for (i = 0; i < WQ_HASH_SZ; i++)
+                init_waitqueue_head(&ioend_wq[i]);
        return 0;
 }
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void)
        kmem_cache_destroy(io_page_cachep);
 }
+void ext4_ioend_wait(struct inode *inode)
+{
+        wait_queue_head_t *wq = to_ioend_wq(inode);
+        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+static void put_io_page(struct ext4_io_page *io_page)
+{
+        if (atomic_dec_and_test(&io_page->p_count)) {
+                end_page_writeback(io_page->p_page);
+                put_page(io_page->p_page);
+                kmem_cache_free(io_page_cachep, io_page);
+        }
+}
 void ext4_free_io_end(ext4_io_end_t *io)
 {
        int i;
+        wait_queue_head_t *wq;
        BUG_ON(!io);
        if (io->page)
                put_page(io->page);
-        for (i = 0; i < io->num_io_pages; i++) {
+        for (i = 0; i < io->num_io_pages; i++)
-                if (--io->pages[i]->p_count == 0) {
+                put_io_page(io->pages[i]);
-                        struct page *page = io->pages[i]->p_page;
-                        end_page_writeback(page);
-                        put_page(page);
-                        kmem_cache_free(io_page_cachep, io->pages[i]);
-                }
-        }
        io->num_io_pages = 0;
-        iput(io->inode);
+        wq = to_ioend_wq(io->inode);
+        if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+            waitqueue_active(wq))
+                wake_up_all(wq);
        kmem_cache_free(io_end_cachep, io);
 }
@@ -142,8 +163,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
        io = kmem_cache_alloc(io_end_cachep, flags);
        if (io) {
                memset(io, 0, sizeof(*io));
-                io->inode = igrab(inode);
+                atomic_inc(&EXT4_I(inode)->i_ioend_count);
-                BUG_ON(!io->inode);
+                io->inode = inode;
                INIT_WORK(&io->work, ext4_end_io_work);
                INIT_LIST_HEAD(&io->list);
        }
@@ -171,35 +192,15 @@ static void ext4_end_bio(struct bio *bio, int error)
        struct workqueue_struct *wq;
        struct inode *inode;
        unsigned long flags;
-        ext4_fsblk_t err_block;
        int i;
        BUG_ON(!io_end);
-        inode = io_end->inode;
        bio->bi_private = NULL;
        bio->bi_end_io = NULL;
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = 0;
-        err_block = bio->bi_sector >> (inode->i_blkbits - 9);
        bio_put(bio);
-        if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
-                pr_err("sb umounted, discard end_io request for inode %lu\n",
-                        io_end->inode->i_ino);
-                ext4_free_io_end(io_end);
-                return;
-        }
-        if (error) {
-                io_end->flag |= EXT4_IO_END_ERROR;
-                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
-                             "(offset %llu size %ld starting block %llu)",
-                             inode->i_ino,
-                             (unsigned long long) io_end->offset,
-                             (long) io_end->size,
-                             (unsigned long long) err_block);
-        }
        for (i = 0; i < io_end->num_io_pages; i++) {
                struct page *page = io_end->pages[i]->p_page;
                struct buffer_head *bh, *head;
@@ -236,13 +237,7 @@ static void ext4_end_bio(struct bio *bio, int error)
                        } while (bh != head);
                }
-                if (--io_end->pages[i]->p_count == 0) {
+                put_io_page(io_end->pages[i]);
-                        struct page *page = io_end->pages[i]->p_page;
-                        end_page_writeback(page);
-                        put_page(page);
-                        kmem_cache_free(io_page_cachep, io_end->pages[i]);
-                }
                /*
                 * If this is a partial write which happened to make
@@ -254,8 +249,19 @@ static void ext4_end_bio(struct bio *bio, int error)
                if (!partial_write)
                        SetPageUptodate(page);
        }
        io_end->num_io_pages = 0;
+        inode = io_end->inode;
+        if (error) {
+                io_end->flag |= EXT4_IO_END_ERROR;
+                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+                             "(offset %llu size %ld starting block %llu)",
+                             inode->i_ino,
+                             (unsigned long long) io_end->offset,
+                             (long) io_end->size,
+                             (unsigned long long)
+                             bio->bi_sector >> (inode->i_blkbits - 9));
+        }
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +311,6 @@ static int io_submit_init(struct ext4_io_submit *io,
        bio->bi_private = io->io_end = io_end;
        bio->bi_end_io = ext4_end_bio;
-        io_end->inode = inode;
        io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
        io->io_bio = bio;
@@ -360,7 +365,7 @@ submit_and_retry:
        if ((io_end->num_io_pages == 0) ||
            (io_end->pages[io_end->num_io_pages-1] != io_page)) {
                io_end->pages[io_end->num_io_pages++] = io_page;
-                io_page->p_count++;
+                atomic_inc(&io_page->p_count);
        }
        return 0;
 }
@@ -389,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                return -ENOMEM;
        }
        io_page->p_page = page;
-        io_page->p_count = 0;
+        atomic_set(&io_page->p_count, 1);
        get_page(page);
        for (bh = head = page_buffers(page), block_start = 0;
@@ -421,10 +426,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
         * PageWriteback bit from the page to prevent the system from
         * wedging later on.
         */
-        if (io_page->p_count == 0) {
+        put_io_page(io_page);
-                put_page(page);
-                end_page_writeback(page);
-                kmem_cache_free(io_page_cachep, io_page);
-        }
        return ret;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0348ce066592..61182fe6254e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -73,8 +73,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
 static void ext4_write_super(struct super_block *sb);
 static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
-                       const char *dev_name, void *data, struct vfsmount *mnt);
+                       const char *dev_name, void *data);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
@@ -82,7 +82,7 @@ static void ext4_unregister_li_request(struct super_block *sb);
 static struct file_system_type ext3_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext3",
-        .get_sb         = ext4_get_sb,
+        .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
@@ -828,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->cur_aio_dio = NULL;
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
+        atomic_set(&ei->i_ioend_count, 0);
        return &ei->vfs_inode;
 }
+static int ext4_drop_inode(struct inode *inode)
+{
+        int drop = generic_drop_inode(inode);
+        trace_ext4_drop_inode(inode, drop);
+        return drop;
+}
 static void ext4_destroy_inode(struct inode *inode)
 {
+        ext4_ioend_wait(inode);
        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
                ext4_msg(inode->i_sb, KERN_ERR,
                         "Inode %lu (%p): orphan list check failed!",
@@ -1173,6 +1183,7 @@ static const struct super_operations ext4_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+        .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .put_super      = ext4_put_super,
        .sync_fs        = ext4_sync_fs,
@@ -1194,6 +1205,7 @@ static const struct super_operations ext4_nojournal_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+        .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .write_super    = ext4_write_super,
        .put_super      = ext4_put_super,
@@ -2699,7 +2711,6 @@ static int ext4_lazyinit_thread(void *arg)
        struct ext4_li_request *elr;
        unsigned long next_wakeup;
        DEFINE_WAIT(wait);
-        int ret;
        BUG_ON(NULL == eli);
@@ -2723,13 +2734,12 @@ cont_thread:
                        elr = list_entry(pos, struct ext4_li_request,
                                         lr_request);
-                        if (time_after_eq(jiffies, elr->lr_next_sched))
+                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
-                                ret = ext4_run_li_request(elr);
+                                if (ext4_run_li_request(elr) != 0) {
+                                        /* error, remove the lazy_init job */
-                        if (ret) {
+                                        ext4_remove_li_request(elr);
-                                ret = 0;
+                                        continue;
-                                ext4_remove_li_request(elr);
+                                }
-                                continue;
                        }
                        if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2740,7 +2750,8 @@ cont_thread:
                if (freezing(current))
                        refrigerator();
-                if (time_after_eq(jiffies, next_wakeup)) {
+                if ((time_after_eq(jiffies, next_wakeup)) ||
+                    (MAX_JIFFY_OFFSET == next_wakeup)) {
                        cond_resched();
                        continue;
                }
@@ -3348,6 +3359,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
+        err = percpu_counter_init(&sbi->s_freeblocks_counter,
+                        ext4_count_free_blocks(sb));
+        if (!err) {
+                err = percpu_counter_init(&sbi->s_freeinodes_counter,
+                                ext4_count_free_inodes(sb));
+        }
+        if (!err) {
+                err = percpu_counter_init(&sbi->s_dirs_counter,
+                                ext4_count_dirs(sb));
+        }
+        if (!err) {
+                err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+        }
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "insufficient memory");
+                goto failed_mount3;
+        }
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_max_writeback_mb_bump = 128;
@@ -3446,22 +3475,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-no_journal:
+        /*
-        err = percpu_counter_init(&sbi->s_freeblocks_counter,
+         * The journal may have updated the bg summary counts, so we
-                                  ext4_count_free_blocks(sb));
+         * need to update the global counters.
-        if (!err)
+         */
-                err = percpu_counter_init(&sbi->s_freeinodes_counter,
+        percpu_counter_set(&sbi->s_freeblocks_counter,
-                                          ext4_count_free_inodes(sb));
+                           ext4_count_free_blocks(sb));
-        if (!err)
+        percpu_counter_set(&sbi->s_freeinodes_counter,
-                err = percpu_counter_init(&sbi->s_dirs_counter,
+                           ext4_count_free_inodes(sb));
-                                          ext4_count_dirs(sb));
+        percpu_counter_set(&sbi->s_dirs_counter,
-        if (!err)
+                           ext4_count_dirs(sb));
-                err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+        percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
-        if (err) {
-                ext4_msg(sb, KERN_ERR, "insufficient memory");
-                goto failed_mount_wq;
-        }
+no_journal:
        EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
        if (!EXT4_SB(sb)->dio_unwritten_wq) {
                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3611,10 +3637,6 @@ failed_mount_wq:
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
-        percpu_counter_destroy(&sbi->s_freeblocks_counter);
-        percpu_counter_destroy(&sbi->s_freeinodes_counter);
-        percpu_counter_destroy(&sbi->s_dirs_counter);
-        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount3:
        if (sbi->s_flex_groups) {
                if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3622,6 +3644,10 @@ failed_mount3:
                else
                        kfree(sbi->s_flex_groups);
        }
+        percpu_counter_destroy(&sbi->s_freeblocks_counter);
+        percpu_counter_destroy(&sbi->s_freeinodes_counter);
+        percpu_counter_destroy(&sbi->s_dirs_counter);
+        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -3949,13 +3975,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        else
                es->s_kbytes_written =
                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
+        ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-                ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+                                           &EXT4_SB(sb)->s_freeblocks_counter));
-                                        &EXT4_SB(sb)->s_freeblocks_counter));
+        es->s_free_inodes_count =
-        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+                cpu_to_le32(percpu_counter_sum_positive(
-                es->s_free_inodes_count =
+                                &EXT4_SB(sb)->s_freeinodes_counter));
-                        cpu_to_le32(percpu_counter_sum_positive(
-                                        &EXT4_SB(sb)->s_freeinodes_counter));
        sb->s_dirt = 0;
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
@@ -4556,12 +4580,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 static int ext4_quota_off(struct super_block *sb, int type)
 {
-        /* Force all delayed allocation blocks to be allocated */
+        /* Force all delayed allocation blocks to be allocated.
-        if (test_opt(sb, DELALLOC)) {
+         * Caller already holds s_umount sem */
-                down_read(&sb->s_umount);
+        if (test_opt(sb, DELALLOC))
                sync_filesystem(sb);
-                up_read(&sb->s_umount);
-        }
        return dquot_quota_off(sb, type);
 }
@@ -4667,17 +4689,17 @@ out:
 #endif
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
-                       const char *dev_name, void *data, struct vfsmount *mnt)
+                       const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
 }
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext2",
-        .get_sb         = ext4_get_sb,
+        .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
@@ -4722,7 +4744,7 @@ static inline void unregister_as_ext3(void) { }
 static struct file_system_type ext4_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext4",
-        .get_sb         = ext4_get_sb,
+        .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae7..3345aabd1dd7 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
        return 0;
 }
-static int msdos_get_sb(struct file_system_type *fs_type,
+static struct dentry *msdos_mount(struct file_system_type *fs_type,
                        int flags, const char *dev_name,
-                        void *data, struct vfsmount *mnt)
+                        void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
-                           mnt);
 }
 static struct file_system_type msdos_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "msdos",
-        .get_sb         = msdos_get_sb,
+        .mount          = msdos_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a0152..b936703b8924 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
        return 0;
 }
-static int vfat_get_sb(struct file_system_type *fs_type,
+static struct dentry *vfat_mount(struct file_system_type *fs_type,
                       int flags, const char *dev_name,
-                       void *data, struct vfsmount *mnt)
+                       void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
-                           mnt);
 }
 static struct file_system_type vfat_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "vfat",
-        .get_sb         = vfat_get_sb,
+        .mount          = vfat_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b8784..9d1c99558389 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
 /*
 * The usual module blurb.
 */
-static int vxfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *vxfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
-                           mnt);
 }
 static struct file_system_type vxfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "vxfs",
-        .get_sb         = vxfs_get_sb,
+        .mount          = vxfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aed881a76b22..3d06ccc953aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -707,6 +707,17 @@ get_next_work_item(struct backing_dev_info *bdi)
        return work;
 }
+/*
+ * Add in the number of potentially dirty inodes, because each inode
+ * write can dirty pagecache in the underlying blockdev.
+ */
+static unsigned long get_nr_dirty_pages(void)
+{
+        return global_page_state(NR_FILE_DIRTY) +
+                global_page_state(NR_UNSTABLE_NFS) +
+                get_nr_dirty_inodes();
+}
 static long wb_check_old_data_flush(struct bdi_writeback *wb)
 {
        unsigned long expired;
@@ -724,13 +735,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
                return 0;
        wb->last_old_flush = jiffies;
-        /*
+        nr_pages = get_nr_dirty_pages();
-         * Add in the number of potentially dirty inodes, because each inode
-         * write can dirty pagecache in the underlying blockdev.
-         */
-        nr_pages = global_page_state(NR_FILE_DIRTY) +
-                        global_page_state(NR_UNSTABLE_NFS) +
-                        get_nr_dirty_inodes();
        if (nr_pages) {
                struct wb_writeback_work work = {
@@ -1076,32 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
 }
 /**
- * writeback_inodes_sb  -       writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr -     writeback dirty inodes from given super_block
 * @sb: the superblock
+ * @nr: the number of pages to write
 *
 * Start writeback on some inodes on this super_block. No guarantees are made
 * on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
+ * for IO completion of submitted IO.
- * returned.
 */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 {
-        unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-        unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
        DECLARE_COMPLETION_ONSTACK(done);
        struct wb_writeback_work work = {
                .sb             = sb,
                .sync_mode      = WB_SYNC_NONE,
                .done           = &done,
+                .nr_pages       = nr,
        };
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
-        work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
        bdi_queue_work(sb->s_bdi, &work);
        wait_for_completion(&done);
 }
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+/**
+ * writeback_inodes_sb  -       writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+        return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+}
 EXPORT_SYMBOL(writeback_inodes_sb);
 /**
@@ -1124,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 /**
+ * writeback_inodes_sb_if_idle  -       start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+                                   unsigned long nr)
+{
+        if (!writeback_in_progress(sb->s_bdi)) {
+                down_read(&sb->s_umount);
+                writeback_inodes_sb_nr(sb, nr);
+                up_read(&sb->s_umount);
+                return 1;
+        } else
+                return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+/**
 * sync_inodes_sb       -       sync sb inode pages
 * @sb: the superblock
 *
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 4eba07661e5c..85542a7daf40 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -322,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
        return 0;
 }
-static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
-                        const char *dev_name, void *raw_data,
+                        int flags, const char *dev_name, void *raw_data)
-                        struct vfsmount *mnt)
 {
-        return get_sb_single(fs_type, flags, raw_data,
+        return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
-                                fuse_ctl_fill_super, mnt);
 }
 static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -346,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
 static struct file_system_type fuse_ctl_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fusectl",
-        .get_sb         = fuse_ctl_get_sb,
+        .mount          = fuse_ctl_mount,
        .kill_sb        = fuse_ctl_kill_sb,
 };
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374c..cfce3ad86a92 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        return err;
 }
-static int fuse_get_sb(struct file_system_type *fs_type,
+static struct dentry *fuse_mount(struct file_system_type *fs_type,
                       int flags, const char *dev_name,
-                       void *raw_data, struct vfsmount *mnt)
+                       void *raw_data)
 {
-        return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
+        return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
 }
 static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1065,16 @@ static struct file_system_type fuse_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuse",
        .fs_flags       = FS_HAS_SUBTYPE,
-        .get_sb         = fuse_get_sb,
+        .mount          = fuse_mount,
        .kill_sb        = fuse_kill_sb_anon,
 };
 #ifdef CONFIG_BLOCK
-static int fuse_get_sb_blk(struct file_system_type *fs_type,
+static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
                           int flags, const char *dev_name,
-                           void *raw_data, struct vfsmount *mnt)
+                           void *raw_data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
-                           mnt);
 }
 static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
 static struct file_system_type fuseblk_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "fuseblk",
-        .get_sb         = fuse_get_sb_blk,
+        .mount          = fuse_mount_blk,
        .kill_sb        = fuse_kill_sb_blk,
        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cade1acbcea9..3eb1393f7b81 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1250,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
 }
 /**
- * gfs2_get_sb - Get the GFS2 superblock
+ * gfs2_mount - Get the GFS2 superblock
 * @fs_type: The GFS2 filesystem type
 * @flags: Mount flags
 * @dev_name: The name of the device
 * @data: The mount arguments
- * @mnt: The vfsmnt for this mount
 *
 * Q. Why not use get_sb_bdev() ?
 * A. We need to select one of two root directories to mount, independent
@@ -1264,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
 * Returns: 0 or -ve on error
 */
-static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
-                       const char *dev_name, void *data, struct vfsmount *mnt)
+                       const char *dev_name, void *data)
 {
        struct block_device *bdev;
        struct super_block *s;
@@ -1279,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
        bdev = open_bdev_exclusive(dev_name, mode, fs_type);
        if (IS_ERR(bdev))
-                return PTR_ERR(bdev);
+                return ERR_CAST(bdev);
        /*
         * once the super is inserted into the list by sget, s_umount
@@ -1298,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
        if (IS_ERR(s))
                goto error_bdev;
+        if (s->s_root)
+                close_bdev_exclusive(bdev, mode);
        memset(&args, 0, sizeof(args));
        args.ar_quota = GFS2_QUOTA_DEFAULT;
        args.ar_data = GFS2_DATA_DEFAULT;
@@ -1309,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
        error = gfs2_mount_args(&args, data);
        if (error) {
                printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
-                if (s->s_root)
+                goto error_super;
-                        goto error_super;
-                deactivate_locked_super(s);
-                return error;
        }
        if (s->s_root) {
                error = -EBUSY;
                if ((flags ^ s->s_flags) & MS_RDONLY)
                        goto error_super;
-                close_bdev_exclusive(bdev, mode);
        } else {
                char b[BDEVNAME_SIZE];
@@ -1328,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
                sb_set_blocksize(s, block_size(bdev));
                error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
-                if (error) {
+                if (error)
-                        deactivate_locked_super(s);
+                        goto error_super;
-                        return error;
-                }
                s->s_flags |= MS_ACTIVE;
                bdev->bd_super = s;
        }
        sdp = s->s_fs_info;
-        mnt->mnt_sb = s;
        if (args.ar_meta)
-                mnt->mnt_root = dget(sdp->sd_master_dir);
+                return dget(sdp->sd_master_dir);
        else
-                mnt->mnt_root = dget(sdp->sd_root_dir);
+                return dget(sdp->sd_root_dir);
-        return 0;
 error_super:
        deactivate_locked_super(s);
+        return ERR_PTR(error);
 error_bdev:
        close_bdev_exclusive(bdev, mode);
-        return error;
+        return ERR_PTR(error);
 }
 static int set_meta_super(struct super_block *s, void *ptr)
@@ -1356,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
        return -EINVAL;
 }
-static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
+static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
-                            const char *dev_name, void *data, struct vfsmount *mnt)
+                        int flags, const char *dev_name, void *data)
 {
        struct super_block *s;
        struct gfs2_sbd *sdp;
@@ -1368,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
        if (error) {
                printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
                       dev_name, error);
-                return error;
+                return ERR_PTR(error);
        }
        s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
                 path.dentry->d_inode->i_sb->s_bdev);
        path_put(&path);
        if (IS_ERR(s)) {
                printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
-                return PTR_ERR(s);
+                return ERR_CAST(s);
        }
        if ((flags ^ s->s_flags) & MS_RDONLY) {
                deactivate_locked_super(s);
-                return -EBUSY;
+                return ERR_PTR(-EBUSY);
        }
        sdp = s->s_fs_info;
-        mnt->mnt_sb = s;
+        return dget(sdp->sd_master_dir);
-        mnt->mnt_root = dget(sdp->sd_master_dir);
-        return 0;
 }
 static void gfs2_kill_sb(struct super_block *sb)
@@ -1410,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
 struct file_system_type gfs2_fs_type = {
        .name = "gfs2",
        .fs_flags = FS_REQUIRES_DEV,
-        .get_sb = gfs2_get_sb,
+        .mount = gfs2_mount,
        .kill_sb = gfs2_kill_sb,
        .owner = THIS_MODULE,
 };
@@ -1418,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
 struct file_system_type gfs2meta_fs_type = {
        .name = "gfs2meta",
        .fs_flags = FS_REQUIRES_DEV,
-        .get_sb = gfs2_get_sb_meta,
+        .mount = gfs2_mount_meta,
        .owner = THIS_MODULE,
 };
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6ee1586f2334..4824c27cebb8 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -441,17 +441,16 @@ bail:
        return res;
 }
-static int hfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *hfs_mount(struct file_system_type *fs_type,
-                      int flags, const char *dev_name, void *data,
+                      int flags, const char *dev_name, void *data)
-                      struct vfsmount *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
 }
 static struct file_system_type hfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "hfs",
-        .get_sb         = hfs_get_sb,
+        .mount          = hfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d7536103..52cc746d3ba3 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
 #define HFSPLUS_INODE_SIZE      sizeof(struct hfsplus_inode_info)
-static int hfsplus_get_sb(struct file_system_type *fs_type,
+static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
-                          int flags, const char *dev_name, void *data,
+                          int flags, const char *dev_name, void *data)
-                          struct vfsmount *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
-                           mnt);
 }
 static struct file_system_type hfsplus_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "hfsplus",
-        .get_sb         = hfsplus_get_sb,
+        .mount          = hfsplus_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cd7c93917cc7..2c0f148a49e6 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -962,11 +962,11 @@ out:
        return err;
 }
-static int hostfs_read_sb(struct file_system_type *type,
+static struct dentry *hostfs_read_sb(struct file_system_type *type,
                          int flags, const char *dev_name,
-                          void *data, struct vfsmount *mnt)
+                          void *data)
 {
-        return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
+        return mount_nodev(type, flags, data, hostfs_fill_sb_common);
 }
 static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
 static struct file_system_type hostfs_type = {
        .owner          = THIS_MODULE,
        .name           = "hostfs",
-        .get_sb         = hostfs_read_sb,
+        .mount          = hostfs_read_sb,
        .kill_sb        = hostfs_kill_sb,
        .fs_flags       = 0,
 };
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index eac5f96323e3..793cb9d943d2 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -14,7 +14,7 @@ void hpfs_lock_creation(struct super_block *s)
 #ifdef DEBUG_LOCKS
        printk("lock creation\n");
 #endif
-        down(&hpfs_sb(s)->hpfs_creation_de);
+        mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
 }
 void hpfs_unlock_creation(struct super_block *s)
@@ -22,7 +22,7 @@ void hpfs_unlock_creation(struct super_block *s)
 #ifdef DEBUG_LOCKS
        printk("unlock creation\n");
 #endif
-        up(&hpfs_sb(s)->hpfs_creation_de);
+        mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
 }
 /* Map a sector into a buffer and return pointers to it and to the buffer. */
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index b59eac0232a0..2fee17d0d9ab 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -87,7 +87,7 @@ struct hpfs_sb_info {
        unsigned *sb_bmp_dir;           /* main bitmap directory */
        unsigned sb_c_bitmap;           /* current bitmap */
        unsigned sb_max_fwd_alloc;      /* max forwad allocation */
-        struct semaphore hpfs_creation_de; /* when creating dirents, nobody else
+        struct mutex hpfs_creation_de;  /* when creating dirents, nobody else
                                           can alloc blocks */
        /*unsigned sb_mounting : 1;*/
        int sb_timeshift;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163a..6c5f01597c3a 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -491,7 +491,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
        sbi->sb_bmp_dir = NULL;
        sbi->sb_cp_table = NULL;
-        init_MUTEX(&sbi->hpfs_creation_de);
+        mutex_init(&sbi->hpfs_creation_de);
        uid = current_uid();
        gid = current_gid();
@@ -686,17 +686,16 @@ bail0:
        return -EINVAL;
 }
-static int hpfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *hpfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
-                           mnt);
 }
 static struct file_system_type hpfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "hpfs",
-        .get_sb         = hpfs_get_sb,
+        .mount          = hpfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea6140..f702b5f713fc 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
        return(err);
 }
-static int hppfs_read_super(struct file_system_type *type,
+static struct dentry *hppfs_read_super(struct file_system_type *type,
                            int flags, const char *dev_name,
-                            void *data, struct vfsmount *mnt)
+                            void *data)
 {
-        return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
+        return mount_nodev(type, flags, data, hppfs_fill_super);
 }
 static struct file_system_type hppfs_type = {
        .owner          = THIS_MODULE,
        .name           = "hppfs",
-        .get_sb         = hppfs_read_super,
+        .mount          = hppfs_read_super,
        .kill_sb        = kill_anon_super,
        .fs_flags       = 0,
 };
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b14be3f781c7..a5fe68189eed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
        }
 }
-static int hugetlbfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
+        return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
 }
 static struct file_system_type hugetlbfs_fs_type = {
        .name           = "hugetlbfs",
-        .get_sb         = hugetlbfs_get_sb,
+        .mount          = hugetlbfs_mount,
        .kill_sb        = kill_litter_super,
 };
@@ -932,8 +932,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
        if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
                *user = current_user();
                if (user_shm_lock(size, *user)) {
-                        WARN_ONCE(1,
+                        printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
-                          "Using mlock ulimits for SHM_HUGETLB deprecated\n");
                } else {
                        *user = NULL;
                        return ERR_PTR(-EPERM);
diff --git a/fs/internal.h b/fs/internal.h
index ebad3b90752d..e43b9a4dbf4e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,5 +106,5 @@ extern void release_open_intent(struct nameidata *);
 * inode.c
 */
 extern int get_nr_dirty_inodes(void);
-extern int evict_inodes(struct super_block *);
+extern void evict_inodes(struct super_block *);
 extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc6..2f7d05c89922 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -111,12 +111,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
        read_lock(&tasklist_lock);
        switch (which) {
                case IOPRIO_WHO_PROCESS:
+                        rcu_read_lock();
                        if (!who)
                                p = current;
                        else
                                p = find_task_by_vpid(who);
                        if (p)
                                ret = set_task_ioprio(p, ioprio);
+                        rcu_read_unlock();
                        break;
                case IOPRIO_WHO_PGRP:
                        if (!who)
@@ -139,7 +141,12 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
                                break;
                        do_each_thread(g, p) {
-                                if (__task_cred(p)->uid != who)
+                                int match;
+                                rcu_read_lock();
+                                match = __task_cred(p)->uid == who;
+                                rcu_read_unlock();
+                                if (!match)
                                        continue;
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
@@ -200,12 +207,14 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
        read_lock(&tasklist_lock);
        switch (which) {
                case IOPRIO_WHO_PROCESS:
+                        rcu_read_lock();
                        if (!who)
                                p = current;
                        else
                                p = find_task_by_vpid(who);
                        if (p)
                                ret = get_task_ioprio(p);
+                        rcu_read_unlock();
                        break;
                case IOPRIO_WHO_PGRP:
                        if (!who)
@@ -232,7 +241,12 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
                                break;
                        do_each_thread(g, p) {
-                                if (__task_cred(p)->uid != user->uid)
+                                int match;
+                                rcu_read_lock();
+                                match = __task_cred(p)->uid == user->uid;
+                                rcu_read_unlock();
+                                if (!match)
                                        continue;
                                tmpio = get_task_ioprio(p);
                                if (tmpio < 0)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 79cf7f616bbe..bfdeb82a53be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1507,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
        return inode;
 }
-static int isofs_get_sb(struct file_system_type *fs_type,
+static struct dentry *isofs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
-                                mnt);
 }
 static struct file_system_type iso9660_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "iso9660",
-        .get_sb         = isofs_get_sb,
+        .mount          = isofs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 538417c1fdbb..c590d155c095 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1838,7 +1838,6 @@ size_t journal_tag_bytes(journal_t *journal)
 */
 #define JBD2_MAX_SLABS 8
 static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
-static DECLARE_MUTEX(jbd2_slab_create_sem);
 static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
        "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
@@ -1859,6 +1858,7 @@ static void jbd2_journal_destroy_slabs(void)
 static int jbd2_journal_create_slab(size_t size)
 {
+        static DEFINE_MUTEX(jbd2_slab_create_mutex);
        int i = order_base_2(size) - 10;
        size_t slab_size;
@@ -1870,16 +1870,16 @@ static int jbd2_journal_create_slab(size_t size)
        if (unlikely(i < 0))
                i = 0;
-        down(&jbd2_slab_create_sem);
+        mutex_lock(&jbd2_slab_create_mutex);
        if (jbd2_slab[i]) {
-                up(&jbd2_slab_create_sem);
+                mutex_unlock(&jbd2_slab_create_mutex);
                return 0;       /* Already created */
        }
        slab_size = 1 << (i+10);
        jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
                                         slab_size, 0, NULL);
-        up(&jbd2_slab_create_sem);
+        mutex_unlock(&jbd2_slab_create_mutex);
        if (!jbd2_slab[i]) {
                printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
                return -ENOMEM;
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a906f538d11c..85c6be2db02f 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -23,7 +23,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
 static inline struct jffs2_inode_cache *
 first_inode_chain(int *i, struct jffs2_sb_info *c)
 {
-        for (; *i < INOCACHE_HASHSIZE; (*i)++) {
+        for (; *i < c->inocache_hashsize; (*i)++) {
                if (c->inocache_list[*i])
                        return c->inocache_list[*i];
        }
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 617a1e5694c1..de4247021d25 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -103,7 +103,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
                        spin_unlock(&jffs2_compressor_list_lock);
                        *datalen  = orig_slen;
                        *cdatalen = orig_dlen;
-                        compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL);
+                        compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
                        spin_lock(&jffs2_compressor_list_lock);
                        this->usecount--;
                        if (!compr_ret) {
@@ -152,7 +152,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
                        spin_unlock(&jffs2_compressor_list_lock);
                        *datalen  = orig_slen;
                        *cdatalen = orig_dlen;
-                        compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL);
+                        compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen);
                        spin_lock(&jffs2_compressor_list_lock);
                        this->usecount--;
                        if (!compr_ret) {
@@ -220,7 +220,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
                        if (comprtype == this->compr) {
                                this->usecount++;
                                spin_unlock(&jffs2_compressor_list_lock);
-                                ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL);
+                                ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
                                spin_lock(&jffs2_compressor_list_lock);
                                if (ret) {
                                        printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index e471a9106fd9..13bb7597ab39 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -49,9 +49,9 @@ struct jffs2_compressor {
        char *name;
        char compr;                     /* JFFS2_COMPR_XXX */
        int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
-                        uint32_t *srclen, uint32_t *destlen, void *model);
+                        uint32_t *srclen, uint32_t *destlen);
        int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
-                          uint32_t cdatalen, uint32_t datalen, void *model);
+                          uint32_t cdatalen, uint32_t datalen);
        int usecount;
        int disabled;           /* if set the compressor won't compress */
        unsigned char *compr_buf;       /* used by size compr. mode */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index ed25ae7c98eb..af186ee674d8 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -42,7 +42,7 @@ static int __init alloc_workspace(void)
 }
 static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
-                              uint32_t *sourcelen, uint32_t *dstlen, void *model)
+                              uint32_t *sourcelen, uint32_t *dstlen)
 {
        size_t compress_size;
        int ret;
@@ -67,7 +67,7 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
 }
 static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
-                                 uint32_t srclen, uint32_t destlen, void *model)
+                                 uint32_t srclen, uint32_t destlen)
 {
        size_t dl = destlen;
        int ret;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 9696ad9ef5f7..16a5047903a6 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -31,8 +31,7 @@
 /* _compress returns the compressed size, -1 if bigger */
 static int jffs2_rtime_compress(unsigned char *data_in,
                                unsigned char *cpage_out,
-                                uint32_t *sourcelen, uint32_t *dstlen,
+                                uint32_t *sourcelen, uint32_t *dstlen)
-                                void *model)
 {
        short positions[256];
        int outpos = 0;
@@ -73,8 +72,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
 static int jffs2_rtime_decompress(unsigned char *data_in,
                                  unsigned char *cpage_out,
-                                  uint32_t srclen, uint32_t destlen,
+                                  uint32_t srclen, uint32_t destlen)
-                                  void *model)
 {
        short positions[256];
        int outpos = 0;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index a12b4f763373..9e7cec808c4c 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -298,7 +298,7 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
 #if 0
 /* _compress returns the compressed size, -1 if bigger */
 int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
-                   uint32_t *sourcelen, uint32_t *dstlen, void *model)
+                   uint32_t *sourcelen, uint32_t *dstlen)
 {
        return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
                                 cpage_out, sourcelen, dstlen);
@@ -306,8 +306,7 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
 #endif
 static int jffs2_dynrubin_compress(unsigned char *data_in,
                                   unsigned char *cpage_out,
-                                   uint32_t *sourcelen, uint32_t *dstlen,
+                                   uint32_t *sourcelen, uint32_t *dstlen)
-                                   void *model)
 {
        int bits[8];
        unsigned char histo[256];
@@ -387,8 +386,7 @@ static void rubin_do_decompress(int bit_divider, int *bits,
 static int jffs2_rubinmips_decompress(unsigned char *data_in,
                                      unsigned char *cpage_out,
-                                      uint32_t sourcelen, uint32_t dstlen,
+                                      uint32_t sourcelen, uint32_t dstlen)
-                                      void *model)
 {
        rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
                            cpage_out, sourcelen, dstlen);
@@ -397,8 +395,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
 static int jffs2_dynrubin_decompress(unsigned char *data_in,
                                     unsigned char *cpage_out,
-                                     uint32_t sourcelen, uint32_t dstlen,
+                                     uint32_t sourcelen, uint32_t dstlen)
-                                     void *model)
 {
        int bits[8];
        int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 97fc45de6f81..fd05a0b9431d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -68,8 +68,7 @@ static void free_workspaces(void)
 static int jffs2_zlib_compress(unsigned char *data_in,
                               unsigned char *cpage_out,
-                               uint32_t *sourcelen, uint32_t *dstlen,
+                               uint32_t *sourcelen, uint32_t *dstlen)
-                               void *model)
 {
        int ret;
@@ -136,8 +135,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 static int jffs2_zlib_decompress(unsigned char *data_in,
                                 unsigned char *cpage_out,
-                                 uint32_t srclen, uint32_t destlen,
+                                 uint32_t srclen, uint32_t destlen)
-                                 void *model)
 {
        int ret;
        int wbits = MAX_WBITS;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 79121aa5858b..92978658ed18 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -367,7 +367,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
        }
        /* We use f->target field to store the target path. */
-        f->target = kmalloc(targetlen + 1, GFP_KERNEL);
+        f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
        if (!f->target) {
                printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
                mutex_unlock(&f->sem);
@@ -376,7 +376,6 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
                goto fail;
        }
-        memcpy(f->target, target, targetlen + 1);
        D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target));
        /* No data here. Only a metadata node, which will be
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index abac961f617b..e513f1913c15 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -151,7 +151,7 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
                }
                /* Be nice */
-                yield();
+                cond_resched();
                mutex_lock(&c->erase_free_sem);
                spin_lock(&c->erase_completion_lock);
        }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d9beb06e6fca..e896e67767eb 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -474,6 +474,25 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
        return inode;
 }
+static int calculate_inocache_hashsize(uint32_t flash_size)
+{
+        /*
+         * Pick a inocache hash size based on the size of the medium.
+         * Count how many megabytes we're dealing with, apply a hashsize twice
+         * that size, but rounding down to the usual big powers of 2. And keep
+         * to sensible bounds.
+         */
+        int size_mb = flash_size / 1024 / 1024;
+        int hashsize = (size_mb * 2) & ~0x3f;
+        if (hashsize < INOCACHE_HASHSIZE_MIN)
+                return INOCACHE_HASHSIZE_MIN;
+        if (hashsize > INOCACHE_HASHSIZE_MAX)
+                return INOCACHE_HASHSIZE_MAX;
+        return hashsize;
+}
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 {
@@ -520,7 +539,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
        if (ret)
                return ret;
-        c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
+        c->inocache_hashsize = calculate_inocache_hashsize(c->flash_size);
+        c->inocache_list = kcalloc(c->inocache_hashsize, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
        if (!c->inocache_list) {
                ret = -ENOMEM;
                goto out_wbuf;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 846a79452497..31dce611337c 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -219,13 +219,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
        if (!list_empty(&c->erase_complete_list) ||
            !list_empty(&c->erase_pending_list)) {
                spin_unlock(&c->erase_completion_lock);
+                mutex_unlock(&c->alloc_sem);
                D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
-                if (jffs2_erase_pending_blocks(c, 1)) {
+                if (jffs2_erase_pending_blocks(c, 1))
-                        mutex_unlock(&c->alloc_sem);
                        return 0;
-                }
                D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
                spin_lock(&c->erase_completion_lock);
+                mutex_lock(&c->alloc_sem);
        }
        /* First, work out which block we're garbage-collecting */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 6784bc89add1..f864005de64c 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -100,6 +100,7 @@ struct jffs2_sb_info {
        wait_queue_head_t erase_wait;           /* For waiting for erases to complete */
        wait_queue_head_t inocache_wq;
+        int inocache_hashsize;
        struct jffs2_inode_cache **inocache_list;
        spinlock_t inocache_lock;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index af02bd138469..5e03233c2363 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -420,7 +420,7 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
 {
        struct jffs2_inode_cache *ret;
-        ret = c->inocache_list[ino % INOCACHE_HASHSIZE];
+        ret = c->inocache_list[ino % c->inocache_hashsize];
        while (ret && ret->ino < ino) {
                ret = ret->next;
        }
@@ -441,7 +441,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
        dbg_inocache("add %p (ino #%u)\n", new, new->ino);
-        prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE];
+        prev = &c->inocache_list[new->ino % c->inocache_hashsize];
        while ((*prev) && (*prev)->ino < new->ino) {
                prev = &(*prev)->next;
@@ -462,7 +462,7 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
        dbg_inocache("del %p (ino #%u)\n", old, old->ino);
        spin_lock(&c->inocache_lock);
-        prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE];
+        prev = &c->inocache_list[old->ino % c->inocache_hashsize];
        while ((*prev) && (*prev)->ino < old->ino) {
                prev = &(*prev)->next;
@@ -487,7 +487,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
        int i;
        struct jffs2_inode_cache *this, *next;
-        for (i=0; i<INOCACHE_HASHSIZE; i++) {
+        for (i=0; i < c->inocache_hashsize; i++) {
                this = c->inocache_list[i];
                while (this) {
                        next = this->next;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 523a91691052..5a53d9bdb2b5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -199,7 +199,8 @@ struct jffs2_inode_cache {
 #define RAWNODE_CLASS_XATTR_DATUM       1
 #define RAWNODE_CLASS_XATTR_REF         2
-#define INOCACHE_HASHSIZE 128
+#define INOCACHE_HASHSIZE_MIN 128
+#define INOCACHE_HASHSIZE_MAX 1024
 #define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 46f870d1cc36..b632dddcb482 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -20,7 +20,7 @@
 #include "summary.h"
 #include "debug.h"
-#define DEFAULT_EMPTY_SCAN_SIZE 1024
+#define DEFAULT_EMPTY_SCAN_SIZE 256
 #define noisy_printk(noise, args...) do { \
        if (*(noise)) { \
@@ -435,7 +435,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
                                  unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
        struct jffs2_unknown_node *node;
        struct jffs2_unknown_node crcnode;
-        uint32_t ofs, prevofs;
+        uint32_t ofs, prevofs, max_ofs;
        uint32_t hdr_crc, buf_ofs, buf_len;
        int err;
        int noise = 0;
@@ -550,12 +550,12 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
        /* We temporarily use 'ofs' as a pointer into the buffer/jeb */
        ofs = 0;
+        max_ofs = EMPTY_SCAN_SIZE(c->sector_size);
-        /* Scan only 4KiB of 0xFF before declaring it's empty */
+        /* Scan only EMPTY_SCAN_SIZE of 0xFF before declaring it's empty */
-        while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
+        while(ofs < max_ofs && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
                ofs += 4;
-        if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) {
+        if (ofs == max_ofs) {
 #ifdef CONFIG_JFFS2_FS_WRITEBUFFER
                if (jffs2_cleanmarker_oob(c)) {
                        /* scan oob, take care of cleanmarker */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b9..c86041b866a4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
        return ret;
 }
-static int jffs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *jffs2_mount(struct file_system_type *fs_type,
                        int flags, const char *dev_name,
-                        void *data, struct vfsmount *mnt)
+                        void *data)
 {
-        return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
+        return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
-                          mnt);
 }
 static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
 static struct file_system_type jffs2_fs_type = {
        .owner =        THIS_MODULE,
        .name =         "jffs2",
-        .get_sb =       jffs2_get_sb,
+        .mount =        jffs2_mount,
        .kill_sb =      jffs2_kill_sb,
 };
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629e..0669fc1cc3bf 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
        return 0;
 }
-static int jfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
-                           mnt);
 }
 static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
 static struct file_system_type jfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "jfs",
-        .get_sb         = jfs_get_sb,
+        .mount          = jfs_do_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/libfs.c b/fs/libfs.c
index 304a5132ca27..a3accdf528ad 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
 * will never be mountable)
 */
-int get_sb_pseudo(struct file_system_type *fs_type, char *name,
+struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
-        const struct super_operations *ops, unsigned long magic,
+        const struct super_operations *ops, unsigned long magic)
-        struct vfsmount *mnt)
 {
        struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
        struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
        struct qstr d_name = {.name = name, .len = strlen(name)};
        if (IS_ERR(s))
-                return PTR_ERR(s);
+                return ERR_CAST(s);
        s->s_flags = MS_NOUSER;
        s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
        d_instantiate(dentry, root);
        s->s_root = dentry;
        s->s_flags |= MS_ACTIVE;
-        simple_set_mnt(mnt, s);
+        return dget(s->s_root);
-        return 0;
 Enomem:
        deactivate_locked_super(s);
-        return -ENOMEM;
+        return ERR_PTR(-ENOMEM);
 }
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -951,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
 EXPORT_SYMBOL(dcache_readdir);
 EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(mount_pseudo);
 EXPORT_SYMBOL(simple_write_begin);
 EXPORT_SYMBOL(simple_write_end);
 EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/locks.c b/fs/locks.c
index 50ec15927aab..0e62dd35d088 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -186,7 +186,7 @@ void locks_release_private(struct file_lock *fl)
 EXPORT_SYMBOL_GPL(locks_release_private);
 /* Free a lock which is not in use. */
-static void locks_free_lock(struct file_lock *fl)
+void locks_free_lock(struct file_lock *fl)
 {
        BUG_ON(waitqueue_active(&fl->fl_wait));
        BUG_ON(!list_empty(&fl->fl_block));
@@ -195,6 +195,7 @@ static void locks_free_lock(struct file_lock *fl)
        locks_release_private(fl);
        kmem_cache_free(filelock_cache, fl);
 }
+EXPORT_SYMBOL(locks_free_lock);
 void locks_init_lock(struct file_lock *fl)
 {
@@ -234,11 +235,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
                        fl->fl_ops->fl_copy_lock(new, fl);
                new->fl_ops = fl->fl_ops;
        }
-        if (fl->fl_lmops) {
+        if (fl->fl_lmops)
-                if (fl->fl_lmops->fl_copy_lock)
-                        fl->fl_lmops->fl_copy_lock(new, fl);
                new->fl_lmops = fl->fl_lmops;
-        }
 }
 /*
@@ -1371,20 +1369,22 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
        struct inode *inode = dentry->d_inode;
        int error, rdlease_count = 0, wrlease_count = 0;
+        lease = *flp;
+        error = -EACCES;
        if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
-                return -EACCES;
+                goto out;
+        error = -EINVAL;
        if (!S_ISREG(inode->i_mode))
-                return -EINVAL;
+                goto out;
        error = security_file_lock(filp, arg);
        if (error)
-                return error;
+                goto out;
        time_out_leases(inode);
        BUG_ON(!(*flp)->fl_lmops->fl_break);
-        lease = *flp;
        if (arg != F_UNLCK) {
                error = -EAGAIN;
                if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1425,8 +1425,9 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
                goto out;
        if (my_before != NULL) {
-                *flp = *my_before;
                error = lease->fl_lmops->fl_change(my_before, arg);
+                if (!error)
+                        *flp = *my_before;
                goto out;
        }
@@ -1441,7 +1442,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
        return 0;
 out:
-        locks_free_lock(lease);
        return error;
 }
 EXPORT_SYMBOL(generic_setlease);
@@ -1493,21 +1493,19 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
 }
 EXPORT_SYMBOL_GPL(vfs_setlease);
-/**
+static int do_fcntl_delete_lease(struct file *filp)
- *      fcntl_setlease  -       sets a lease on an open file
- *      @fd: open file descriptor
- *      @filp: file pointer
- *      @arg: type of lease to obtain
- *
- *      Call this fcntl to establish a lease on the file.
- *      Note that you also need to call %F_SETSIG to
- *      receive a signal when the lease is broken.
- */
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
-        struct file_lock *fl;
+        struct file_lock fl, *flp = &fl;
+        lease_init(filp, F_UNLCK, flp);
+        return vfs_setlease(filp, F_UNLCK, &flp);
+}
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+{
+        struct file_lock *fl, *ret;
        struct fasync_struct *new;
-        struct inode *inode = filp->f_path.dentry->d_inode;
        int error;
        fl = lease_alloc(filp, arg);
@@ -1519,10 +1517,16 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
                locks_free_lock(fl);
                return -ENOMEM;
        }
+        ret = fl;
        lock_flocks();
-        error = __vfs_setlease(filp, arg, &fl);
+        error = __vfs_setlease(filp, arg, &ret);
-        if (error || arg == F_UNLCK)
+        if (error) {
-                goto out_unlock;
+                unlock_flocks();
+                locks_free_lock(fl);
+                goto out_free_fasync;
+        }
+        if (ret != fl)
+                locks_free_lock(fl);
        /*
         * fasync_insert_entry() returns the old entry if any.
@@ -1530,26 +1534,36 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
         * inserted it into the fasync list. Clear new so that
         * we don't release it here.
         */
-        if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
+        if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
                new = NULL;
-        if (error < 0) {
-                /* remove lease just inserted by setlease */
-                fl->fl_type = F_UNLCK | F_INPROGRESS;
-                fl->fl_break_time = jiffies - 10;
-                time_out_leases(inode);
-                goto out_unlock;
-        }
        error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
        unlock_flocks();
+out_free_fasync:
        if (new)
                fasync_free(new);
        return error;
 }
 /**
+ *      fcntl_setlease  -       sets a lease on an open file
+ *      @fd: open file descriptor
+ *      @filp: file pointer
+ *      @arg: type of lease to obtain
+ *
+ *      Call this fcntl to establish a lease on the file.
+ *      Note that you also need to call %F_SETSIG to
+ *      receive a signal when the lease is broken.
+ */
+int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+{
+        if (arg == F_UNLCK)
+                return do_fcntl_delete_lease(filp);
+        return do_fcntl_add_lease(fd, filp, arg);
+}
+/**
 * flock_lock_file_wait - Apply a FLOCK-style lock to a file
 * @filp: The file to apply the lock to
 * @fl: The lock to be applied
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2a3040..92ca6fbe09bd 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
        return sync_request(page, bdev, WRITE);
 }
-static void bdev_put_device(struct super_block *sb)
+static void bdev_put_device(struct logfs_super *s)
 {
-        close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
+        close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
 }
 static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
        .put_device     = bdev_put_device,
 };
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
-                const char *devname, struct vfsmount *mnt)
+                const char *devname)
 {
        struct block_device *bdev;
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
        if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
                int mtdnr = MINOR(bdev->bd_dev);
                close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
-                return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+                return logfs_get_sb_mtd(p, mtdnr);
        }
-        return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
+        p->s_bdev = bdev;
+        p->s_mtd = NULL;
+        p->s_devops = &bd_devops;
+        return 0;
 }
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d13e4b..7466e9dcc8c5 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
        __mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
 }
-static void mtd_put_device(struct super_block *sb)
+static void mtd_put_device(struct logfs_super *s)
 {
-        put_mtd_device(logfs_super(sb)->s_mtd);
+        put_mtd_device(s->s_mtd);
 }
 static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
        .put_device     = mtd_put_device,
 };
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
-                int mtdnr, struct vfsmount *mnt)
 {
-        struct mtd_info *mtd;
+        struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
-        const struct logfs_device_ops *devops = &mtd_devops;
-        mtd = get_mtd_device(NULL, mtdnr);
        if (IS_ERR(mtd))
                return PTR_ERR(mtd);
-        return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
+        s->s_bdev = NULL;
+        s->s_mtd = mtd;
+        s->s_devops = &mtd_devops;
+        return 0;
 }
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b8786264d243..57afd4a6fabb 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
        int     (*erase_segment)(struct logfs_area *area);
 };
+struct logfs_super;     /* forward */
 /**
 * struct logfs_device_ops - device access operations
 *
@@ -156,7 +157,7 @@ struct logfs_device_ops {
                        int ensure_write);
        int (*can_write_buf)(struct super_block *sb, u64 ofs);
        void (*sync)(struct super_block *sb);
-        void (*put_device)(struct super_block *sb);
+        void (*put_device)(struct logfs_super *s);
 };
 /**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
 /* dev_bdev.c */
 #ifdef CONFIG_BLOCK
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+int logfs_get_sb_bdev(struct logfs_super *s,
-                const char *devname, struct vfsmount *mnt);
+                struct file_system_type *type,
+                const char *devname);
 #else
-static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
+static inline int logfs_get_sb_bdev(struct logfs_super *s,
-                const char *devname, struct vfsmount *mnt)
+                struct file_system_type *type,
+                const char *devname)
 {
        return -ENODEV;
 }
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
 /* dev_mtd.c */
 #ifdef CONFIG_MTD
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
-                int mtdnr, struct vfsmount *mnt);
 #else
-static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags,
+static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
-                int mtdnr, struct vfsmount *mnt)
 {
        return -ENODEV;
 }
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
 void logfs_crash_dump(struct super_block *sb);
 void *memchr_inv(const void *s, int c, size_t n);
 int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
-int logfs_get_sb_device(struct file_system_type *type, int flags,
-                struct mtd_info *mtd, struct block_device *bdev,
-                const struct logfs_device_ops *devops, struct vfsmount *mnt);
 int logfs_check_ds(struct logfs_disk_super *ds);
 int logfs_write_sb(struct super_block *sb);
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155c5d81..33435e4b14d2 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
        return 0;
 }
-static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+static int logfs_get_sb_final(struct super_block *sb)
 {
        struct logfs_super *super = logfs_super(sb);
        struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
        }
        log_super("LogFS: Finished mounting\n");
-        simple_set_mnt(mnt, sb);
        return 0;
 fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
        logfs_cleanup_rw(sb);
        if (super->s_erase_page)
                __free_page(super->s_erase_page);
-        super->s_devops->put_device(sb);
+        super->s_devops->put_device(super);
        logfs_mempool_destroy(super->s_btree_pool);
        logfs_mempool_destroy(super->s_alias_pool);
        kfree(super);
        log_super("LogFS: Finished unmounting\n");
 }
-int logfs_get_sb_device(struct file_system_type *type, int flags,
+static struct dentry *logfs_get_sb_device(struct logfs_super *super,
-                struct mtd_info *mtd, struct block_device *bdev,
+                struct file_system_type *type, int flags)
-                const struct logfs_device_ops *devops, struct vfsmount *mnt)
 {
-        struct logfs_super *super;
        struct super_block *sb;
        int err = -ENOMEM;
        static int mount_count;
        log_super("LogFS: Start mount %x\n", mount_count++);
-        super = kzalloc(sizeof(*super), GFP_KERNEL);
-        if (!super)
-                goto err0;
-        super->s_mtd    = mtd;
-        super->s_bdev   = bdev;
        err = -EINVAL;
        sb = sget(type, logfs_sb_test, logfs_sb_set, super);
-        if (IS_ERR(sb))
+        if (IS_ERR(sb)) {
-                goto err0;
+                super->s_devops->put_device(super);
+                kfree(super);
+                return ERR_CAST(sb);
+        }
        if (sb->s_root) {
                /* Device is already in use */
-                err = 0;
+                super->s_devops->put_device(super);
-                simple_set_mnt(mnt, sb);
+                kfree(super);
-                goto err0;
+                return dget(sb->s_root);
        }
-        super->s_devops = devops;
        /*
         * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
         * only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
                goto err1;
        sb->s_flags |= MS_ACTIVE;
-        err = logfs_get_sb_final(sb, mnt);
+        err = logfs_get_sb_final(sb);
-        if (err)
+        if (err) {
                deactivate_locked_super(sb);
-        return err;
+                return ERR_PTR(err);
+        }
+        return dget(sb->s_root);
 err1:
        /* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
        iput(super->s_segfile_inode);
        iput(super->s_mapping_inode);
        deactivate_locked_super(sb);
-        return err;
+        return ERR_PTR(err);
-err0:
-        kfree(super);
-        //devops->put_device(sb);
-        return err;
 }
-static int logfs_get_sb(struct file_system_type *type, int flags,
+static struct dentry *logfs_mount(struct file_system_type *type, int flags,
-                const char *devname, void *data, struct vfsmount *mnt)
+                const char *devname, void *data)
 {
        ulong mtdnr;
+        struct logfs_super *super;
+        int err;
-        if (!devname)
+        super = kzalloc(sizeof(*super), GFP_KERNEL);
-                return logfs_get_sb_bdev(type, flags, devname, mnt);
+        if (!super)
-        if (strncmp(devname, "mtd", 3))
+                return ERR_PTR(-ENOMEM);
-                return logfs_get_sb_bdev(type, flags, devname, mnt);
-        {
+        if (!devname)
+                err = logfs_get_sb_bdev(super, type, devname);
+        else if (strncmp(devname, "mtd", 3))
+                err = logfs_get_sb_bdev(super, type, devname);
+        else {
                char *garbage;
                mtdnr = simple_strtoul(devname+3, &garbage, 0);
                if (*garbage)
-                        return -EINVAL;
+                        err = -EINVAL;
+                else
+                        err = logfs_get_sb_mtd(super, mtdnr);
+        }
+        if (err) {
+                kfree(super);
+                return ERR_PTR(err);
        }
-        return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+        return logfs_get_sb_device(super, type, flags);
 }
 static struct file_system_type logfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "logfs",
-        .get_sb         = logfs_get_sb,
+        .mount          = logfs_mount,
        .kill_sb        = logfs_kill_sb,
        .fs_flags       = FS_REQUIRES_DEV,
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8fb..fb2020858a34 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
                V2_minix_truncate(inode);
 }
-static int minix_get_sb(struct file_system_type *fs_type,
+static struct dentry *minix_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
-                           mnt);
 }
 static struct file_system_type minix_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "minix",
-        .get_sb         = minix_get_sb,
+        .mount          = minix_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/namei.c b/fs/namei.c
index f7dbc06857ab..5362af9b7372 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1574,6 +1574,7 @@ static struct file *finish_open(struct nameidata *nd,
         */
        if (will_truncate)
                mnt_drop_write(nd->path.mnt);
+        path_put(&nd->path);
        return filp;
 exit:
@@ -1675,6 +1676,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
                }
                filp = nameidata_to_filp(nd);
                mnt_drop_write(nd->path.mnt);
+                path_put(&nd->path);
                if (!IS_ERR(filp)) {
                        error = ima_file_check(filp, acc_mode);
                        if (error) {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26aca..d290545aa0c4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1020,16 +1020,16 @@ out:
        return result;
 }
-static int ncp_get_sb(struct file_system_type *fs_type,
+static struct dentry *ncp_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
+        return mount_nodev(fs_type, flags, data, ncp_fill_super);
 }
 static struct file_system_type ncp_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ncpfs",
-        .get_sb         = ncp_get_sb,
+        .mount          = ncp_mount,
        .kill_sb        = kill_anon_super,
        .fs_flags       = FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a80961677..84d3c8b90206 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -873,7 +873,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
        dreq->inode = inode;
        dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
        dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
-        if (dreq->l_ctx != NULL)
+        if (dreq->l_ctx == NULL)
                goto out_release;
        if (!is_sync_kiocb(iocb))
                dreq->iocb = iocb;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e756075637b0..60677f9f1311 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -884,6 +884,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
        dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
                        file->f_path.dentry->d_parent->d_name.name,
                        file->f_path.dentry->d_name.name, arg);
        return -EINVAL;
 }
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dec47ed8b6b9..4e2d9b6b1380 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -123,7 +123,7 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
        size_t desclen = typelen + namelen + 2;
        *desc = kmalloc(desclen, GFP_KERNEL);
-        if (!desc)
+        if (!*desc)
                return -ENOMEM;
        cp = *desc;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 32c8758c99fd..0f24cdf2cb13 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -429,7 +429,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
                 * returned NFS4ERR_DELAY as per Section 2.10.6.2
                 * of RFC5661.
                 */
-                dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
+                dprintk("%s: slot=%td seq=%d: Operation in progress\n",
                        __func__,
                        res->sr_slot - res->sr_session->fc_slot_table.slots,
                        res->sr_slot->seq_nr);
@@ -573,7 +573,7 @@ int nfs4_setup_sequence(const struct nfs_server *server,
                goto out;
        }
-        dprintk("--> %s clp %p session %p sr_slot %ld\n",
+        dprintk("--> %s clp %p session %p sr_slot %td\n",
                __func__, session->clp, session, res->sr_slot ?
                        res->sr_slot - session->fc_slot_table.slots : -1);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 919490232e17..137b549e63db 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
        if (req == NULL)
                return ERR_PTR(-ENOMEM);
+        /* get lock context early so we can deal with alloc failures */
+        req->wb_lock_context = nfs_get_lock_context(ctx);
+        if (req->wb_lock_context == NULL) {
+                nfs_page_free(req);
+                return ERR_PTR(-ENOMEM);
+        }
        /* Initialize the request struct. Initially, we assume a
         * long write-back delay. This will be adjusted in
         * update_nfs_request below if the region is not locked. */
@@ -79,7 +86,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
        req->wb_pgbase  = offset;
        req->wb_bytes   = count;
        req->wb_context = get_nfs_open_context(ctx);
-        req->wb_lock_context = nfs_get_lock_context(ctx);
        kref_init(&req->wb_kref);
        return req;
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3600ec700d58..0a42e8f4adcb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -260,8 +260,8 @@ static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_xdev_get_sb(struct file_system_type *fs_type,
+static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
-                int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+                int flags, const char *dev_name, void *raw_data);
 static void nfs_put_super(struct super_block *);
 static void nfs_kill_super(struct super_block *);
 static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -277,7 +277,7 @@ static struct file_system_type nfs_fs_type = {
 struct file_system_type nfs_xdev_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfs",
-        .get_sb         = nfs_xdev_get_sb,
+        .mount          = nfs_xdev_mount,
        .kill_sb        = nfs_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -302,14 +302,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
        struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
 static int nfs4_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
+static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+        int flags, const char *dev_name, void *raw_data);
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+        int flags, const char *dev_name, void *raw_data);
 static int nfs4_referral_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
+static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+        int flags, const char *dev_name, void *raw_data);
 static void nfs4_kill_super(struct super_block *sb);
 static struct file_system_type nfs4_fs_type = {
@@ -323,7 +323,7 @@ static struct file_system_type nfs4_fs_type = {
 static struct file_system_type nfs4_remote_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfs4",
-        .get_sb         = nfs4_remote_get_sb,
+        .mount          = nfs4_remote_mount,
        .kill_sb        = nfs4_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -331,7 +331,7 @@ static struct file_system_type nfs4_remote_fs_type = {
 struct file_system_type nfs4_xdev_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfs4",
-        .get_sb         = nfs4_xdev_get_sb,
+        .mount          = nfs4_xdev_mount,
        .kill_sb        = nfs4_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -339,7 +339,7 @@ struct file_system_type nfs4_xdev_fs_type = {
 static struct file_system_type nfs4_remote_referral_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfs4",
-        .get_sb         = nfs4_remote_referral_get_sb,
+        .mount          = nfs4_remote_referral_mount,
        .kill_sb        = nfs4_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -2397,9 +2397,9 @@ static void nfs_kill_super(struct super_block *s)
 /*
 * Clone an NFS2/3 server record on xdev traversal (FSID-change)
 */
-static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *
-                           const char *dev_name, void *raw_data,
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
-                           struct vfsmount *mnt)
+                const char *dev_name, void *raw_data)
 {
        struct nfs_clone_mount *data = raw_data;
        struct super_block *s;
@@ -2411,7 +2411,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
        };
        int error;
-        dprintk("--> nfs_xdev_get_sb()\n");
+        dprintk("--> nfs_xdev_mount()\n");
        /* create a new volume representation */
        server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2458,28 +2458,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
        }
        s->s_flags |= MS_ACTIVE;
-        mnt->mnt_sb = s;
-        mnt->mnt_root = mntroot;
        /* clone any lsm security options from the parent to the new sb */
        security_sb_clone_mnt_opts(data->sb, s);
-        dprintk("<-- nfs_xdev_get_sb() = 0\n");
+        dprintk("<-- nfs_xdev_mount() = 0\n");
-        return 0;
+        return mntroot;
 out_err_nosb:
        nfs_free_server(server);
 out_err_noserver:
-        dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+        dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
-        return error;
+        return ERR_PTR(error);
 error_splat_super:
        if (server && !s->s_root)
                bdi_unregister(&server->backing_dev_info);
 error_splat_bdi:
        deactivate_locked_super(s);
-        dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+        dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
-        return error;
+        return ERR_PTR(error);
 }
 #ifdef CONFIG_NFS_V4
@@ -2649,8 +2647,9 @@ out_no_address:
 /*
 * Get the superblock for the NFS4 root partition
 */
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
+static struct dentry *
-        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+nfs4_remote_mount(struct file_system_type *fs_type, int flags,
+                  const char *dev_name, void *raw_data)
 {
        struct nfs_parsed_mount_data *data = raw_data;
        struct super_block *s;
@@ -2714,15 +2713,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
                goto error_splat_root;
        s->s_flags |= MS_ACTIVE;
-        mnt->mnt_sb = s;
-        mnt->mnt_root = mntroot;
+        security_free_mnt_opts(&data->lsm_opts);
-        error = 0;
+        nfs_free_fhandle(mntfh);
+        return mntroot;
 out:
        security_free_mnt_opts(&data->lsm_opts);
 out_free_fh:
        nfs_free_fhandle(mntfh);
-        return error;
+        return ERR_PTR(error);
 out_free:
        nfs_free_server(server);
@@ -2968,9 +2968,9 @@ static void nfs4_kill_super(struct super_block *sb)
 /*
 * Clone an NFS4 server record on xdev traversal (FSID-change)
 */
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *
-                            const char *dev_name, void *raw_data,
+nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
-                            struct vfsmount *mnt)
+                 const char *dev_name, void *raw_data)
 {
        struct nfs_clone_mount *data = raw_data;
        struct super_block *s;
@@ -2982,7 +2982,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
        };
        int error;
-        dprintk("--> nfs4_xdev_get_sb()\n");
+        dprintk("--> nfs4_xdev_mount()\n");
        /* create a new volume representation */
        server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -3029,32 +3029,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
        }
        s->s_flags |= MS_ACTIVE;
-        mnt->mnt_sb = s;
-        mnt->mnt_root = mntroot;
        security_sb_clone_mnt_opts(data->sb, s);
-        dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+        dprintk("<-- nfs4_xdev_mount() = 0\n");
-        return 0;
+        return mntroot;
 out_err_nosb:
        nfs_free_server(server);
 out_err_noserver:
-        dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+        dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
-        return error;
+        return ERR_PTR(error);
 error_splat_super:
        if (server && !s->s_root)
                bdi_unregister(&server->backing_dev_info);
 error_splat_bdi:
        deactivate_locked_super(s);
-        dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+        dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
-        return error;
+        return ERR_PTR(error);
 }
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
+static struct dentry *
-                int flags, const char *dev_name, void *raw_data,
+nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
-                struct vfsmount *mnt)
+                           const char *dev_name, void *raw_data)
 {
        struct nfs_clone_mount *data = raw_data;
        struct super_block *s;
@@ -3118,14 +3116,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
        }
        s->s_flags |= MS_ACTIVE;
-        mnt->mnt_sb = s;
-        mnt->mnt_root = mntroot;
        security_sb_clone_mnt_opts(data->sb, s);
        nfs_free_fhandle(mntfh);
        dprintk("<-- nfs4_referral_get_sb() = 0\n");
-        return 0;
+        return mntroot;
 out_err_nosb:
        nfs_free_server(server);
@@ -3133,7 +3129,7 @@ out_err_noserver:
        nfs_free_fhandle(mntfh);
 out_err_nofh:
        dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
-        return error;
+        return ERR_PTR(error);
 error_splat_super:
        if (server && !s->s_root)
@@ -3142,7 +3138,7 @@ error_splat_bdi:
        deactivate_locked_super(s);
        nfs_free_fhandle(mntfh);
        dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
-        return error;
+        return ERR_PTR(error);
 }
 /*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9a16bad5d2ea..7bdec8531400 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -444,9 +444,9 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
        /* set up nfs_renamedata */
        data->old_dir = old_dir;
-        atomic_inc(&old_dir->i_count);
+        ihold(old_dir);
        data->new_dir = new_dir;
-        atomic_inc(&new_dir->i_count);
+        ihold(new_dir);
        data->old_dentry = dget(old_dentry);
        data->new_dentry = dget(new_dentry);
        nfs_fattr_init(&data->old_fattr);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 56347e0ac88d..ad2bfa68d534 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
        spin_unlock(&clp->cl_lock);
 }
-static void nfsd4_register_conn(struct nfsd4_conn *conn)
+static int nfsd4_register_conn(struct nfsd4_conn *conn)
 {
        conn->cn_xpt_user.callback = nfsd4_conn_lost;
-        register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
+        return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
 }
 static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
 {
        struct nfsd4_conn *conn;
        u32 flags = NFS4_CDFC4_FORE;
+        int ret;
        if (ses->se_flags & SESSION4_BACK_CHAN)
                flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
        if (!conn)
                return nfserr_jukebox;
        nfsd4_hash_conn(conn, ses);
-        nfsd4_register_conn(conn);
+        ret = nfsd4_register_conn(conn);
+        if (ret)
+                /* oops; xprt is already down: */
+                nfsd4_conn_lost(&conn->cn_xpt_user);
        return nfs_ok;
 }
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
 {
        struct nfs4_client *clp = ses->se_client;
        struct nfsd4_conn *c;
+        int ret;
        spin_lock(&clp->cl_lock);
        c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
        }
        __nfsd4_hash_conn(new, ses);
        spin_unlock(&clp->cl_lock);
-        nfsd4_register_conn(new);
+        ret = nfsd4_register_conn(new);
+        if (ret)
+                /* oops; xprt is already down: */
+                nfsd4_conn_lost(&new->cn_xpt_user);
        return;
 }
@@ -2310,22 +2318,6 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
 }
 /*
- * Set the delegation file_lock back pointer.
- *
- * Called from setlease() with lock_kernel() held.
- */
-static
-void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
-{
-        struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
-        dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
-        if (!dp)
-                return;
-        dp->dl_flock = new;
-}
-/*
 * Called from setlease() with lock_kernel() held
 */
 static
@@ -2355,7 +2347,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 static const struct lock_manager_operations nfsd_lease_mng_ops = {
        .fl_break = nfsd_break_deleg_cb,
        .fl_release_private = nfsd_release_deleg_cb,
-        .fl_copy_lock = nfsd_copy_lock_deleg_cb,
        .fl_mylease = nfsd_same_client_deleg_cb,
        .fl_change = nfsd_change_deleg_cb,
 };
@@ -2661,12 +2652,15 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
        fl->fl_file = find_readable_file(stp->st_file);
        BUG_ON(!fl->fl_file);
        fl->fl_pid = current->tgid;
+        dp->dl_flock = fl;
        /* vfs_setlease checks to see if delegation should be handed out.
         * the lock_manager callbacks fl_mylease and fl_change are used
         */
        if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
                dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+                dp->dl_flock = NULL;
+                locks_free_lock(fl);
                unhash_delegation(dp);
                flag = NFS4_OPEN_DELEGATE_NONE;
                goto out;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f61f8ba..4514ebbee4d6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
        return simple_fill_super(sb, 0x6e667364, nfsd_files);
 }
-static int nfsd_get_sb(struct file_system_type *fs_type,
+static struct dentry *nfsd_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
+        return mount_single(fs_type, flags, data, nfsd_fill_super);
 }
 static struct file_system_type nfsd_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "nfsd",
-        .get_sb         = nfsd_get_sb,
+        .mount          = nfsd_mount,
        .kill_sb        = kill_litter_super,
 };
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c0db86..f804d41ec9d3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1141,9 +1141,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
        return (void *)s->s_bdev == data;
 }
-static int
+static struct dentry *
-nilfs_get_sb(struct file_system_type *fs_type, int flags,
+nilfs_mount(struct file_system_type *fs_type, int flags,
-             const char *dev_name, void *data, struct vfsmount *mnt)
+             const char *dev_name, void *data)
 {
        struct nilfs_super_data sd;
        struct super_block *s;
@@ -1156,7 +1156,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
        sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
        if (IS_ERR(sd.bdev))
-                return PTR_ERR(sd.bdev);
+                return ERR_CAST(sd.bdev);
        sd.cno = 0;
        sd.flags = flags;
@@ -1235,9 +1235,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
        if (!s_new)
                close_bdev_exclusive(sd.bdev, mode);
-        mnt->mnt_sb = s;
+        return root_dentry;
-        mnt->mnt_root = root_dentry;
-        return 0;
 failed_super:
        deactivate_locked_super(s);
@@ -1245,13 +1243,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 failed:
        if (!s_new)
                close_bdev_exclusive(sd.bdev, mode);
-        return err;
+        return ERR_PTR(err);
 }
 struct file_system_type nilfs_fs_type = {
        .owner    = THIS_MODULE,
        .name     = "nilfs2",
-        .get_sb   = nilfs_get_sb,
+        .mount    = nilfs_mount,
        .kill_sb  = kill_block_super,
        .fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index b388443c3a09..22c629eedd82 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
-#source "fs/notify/fanotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 85366c78cc37..b04f88eed09e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -131,6 +131,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
        BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
        BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
        BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+        BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
        pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -160,20 +161,21 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
                                       __u32 event_mask, void *data, int data_type)
 {
        __u32 marks_mask, marks_ignored_mask;
+        struct path *path = data;
        pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
                 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
                 inode_mark, vfsmnt_mark, event_mask, data, data_type);
-        /* sorry, fanotify only gives a damn about files and dirs */
-        if (!S_ISREG(to_tell->i_mode) &&
-            !S_ISDIR(to_tell->i_mode))
-                return false;
        /* if we don't have enough info to send an event to userspace say no */
        if (data_type != FSNOTIFY_EVENT_PATH)
                return false;
+        /* sorry, fanotify only gives a damn about files and dirs */
+        if (!S_ISREG(path->dentry->d_inode->i_mode) &&
+            !S_ISDIR(path->dentry->d_inode->i_mode))
+                return false;
        if (inode_mark && vfsmnt_mark) {
                marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
                marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
@@ -194,16 +196,29 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
                BUG();
        }
+        if (S_ISDIR(path->dentry->d_inode->i_mode) &&
+            (marks_ignored_mask & FS_ISDIR))
+                return false;
        if (event_mask & marks_mask & ~marks_ignored_mask)
                return true;
        return false;
 }
+static void fanotify_free_group_priv(struct fsnotify_group *group)
+{
+        struct user_struct *user;
+        user = group->fanotify_data.user;
+        atomic_dec(&user->fanotify_listeners);
+        free_uid(user);
+}
 const struct fsnotify_ops fanotify_fsnotify_ops = {
        .handle_event = fanotify_handle_event,
        .should_send_event = fanotify_should_send_event,
-        .free_group_priv = NULL,
+        .free_group_priv = fanotify_free_group_priv,
        .free_event_priv = NULL,
        .freeing_mark = NULL,
 };
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bbcb98e7fcc6..063224812b7e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,10 @@
 #include <asm/ioctls.h>
+#define FANOTIFY_DEFAULT_MAX_EVENTS     16384
+#define FANOTIFY_DEFAULT_MAX_MARKS      8192
+#define FANOTIFY_DEFAULT_MAX_LISTENERS  128
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
@@ -326,7 +330,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
                ret = -EAGAIN;
                if (file->f_flags & O_NONBLOCK)
                        break;
-                ret = -EINTR;
+                ret = -ERESTARTSYS;
                if (signal_pending(current))
                        break;
@@ -372,11 +376,10 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
        struct fsnotify_group *group = file->private_data;
-        struct fanotify_response_event *re, *lre;
-        pr_debug("%s: file=%p group=%p\n", __func__, file, group);
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+        struct fanotify_response_event *re, *lre;
        mutex_lock(&group->fanotify_data.access_mutex);
        group->fanotify_data.bypass_perm = true;
@@ -554,18 +557,24 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
                                       __u32 mask,
                                       unsigned int flags)
 {
-        __u32 oldmask;
+        __u32 oldmask = -1;
        spin_lock(&fsn_mark->lock);
        if (!(flags & FAN_MARK_IGNORED_MASK)) {
                oldmask = fsn_mark->mask;
                fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
        } else {
-                oldmask = fsn_mark->ignored_mask;
+                __u32 tmask = fsn_mark->ignored_mask | mask;
-                fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+                fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
                if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
                        fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
        }
+        if (!(flags & FAN_MARK_ONDIR)) {
+                __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
+                fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+        }
        spin_unlock(&fsn_mark->lock);
        return mask & ~oldmask;
@@ -582,6 +591,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
        if (!fsn_mark) {
                int ret;
+                if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+                        return -ENOSPC;
                fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
                if (!fsn_mark)
                        return -ENOMEM;
@@ -610,10 +622,23 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
        pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
+        /*
+         * If some other task has this inode open for write we should not add
+         * an ignored mark, unless that ignored mark is supposed to survive
+         * modification changes anyway.
+         */
+        if ((flags & FAN_MARK_IGNORED_MASK) &&
+            !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+            (atomic_read(&inode->i_writecount) > 0))
+                return 0;
        fsn_mark = fsnotify_find_inode_mark(group, inode);
        if (!fsn_mark) {
                int ret;
+                if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+                        return -ENOSPC;
                fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
                if (!fsn_mark)
                        return -ENOMEM;
@@ -637,6 +662,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 {
        struct fsnotify_group *group;
        int f_flags, fd;
+        struct user_struct *user;
        pr_debug("%s: flags=%d event_f_flags=%d\n",
                __func__, flags, event_f_flags);
@@ -647,6 +673,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
        if (flags & ~FAN_ALL_INIT_FLAGS)
                return -EINVAL;
+        user = get_current_user();
+        if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
+                free_uid(user);
+                return -EMFILE;
+        }
        f_flags = O_RDWR | FMODE_NONOTIFY;
        if (flags & FAN_CLOEXEC)
                f_flags |= O_CLOEXEC;
@@ -658,12 +690,47 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
        if (IS_ERR(group))
                return PTR_ERR(group);
+        group->fanotify_data.user = user;
+        atomic_inc(&user->fanotify_listeners);
        group->fanotify_data.f_flags = event_f_flags;
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        mutex_init(&group->fanotify_data.access_mutex);
        init_waitqueue_head(&group->fanotify_data.access_waitq);
        INIT_LIST_HEAD(&group->fanotify_data.access_list);
 #endif
+        switch (flags & FAN_ALL_CLASS_BITS) {
+        case FAN_CLASS_NOTIF:
+                group->priority = FS_PRIO_0;
+                break;
+        case FAN_CLASS_CONTENT:
+                group->priority = FS_PRIO_1;
+                break;
+        case FAN_CLASS_PRE_CONTENT:
+                group->priority = FS_PRIO_2;
+                break;
+        default:
+                fd = -EINVAL;
+                goto out_put_group;
+        }
+        if (flags & FAN_UNLIMITED_QUEUE) {
+                fd = -EPERM;
+                if (!capable(CAP_SYS_ADMIN))
+                        goto out_put_group;
+                group->max_events = UINT_MAX;
+        } else {
+                group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+        }
+        if (flags & FAN_UNLIMITED_MARKS) {
+                fd = -EPERM;
+                if (!capable(CAP_SYS_ADMIN))
+                        goto out_put_group;
+                group->fanotify_data.max_marks = UINT_MAX;
+        } else {
+                group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+        }
        fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
        if (fd < 0)
@@ -704,6 +771,12 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
        default:
                return -EINVAL;
        }
+        if (mask & FAN_ONDIR) {
+                flags |= FAN_MARK_ONDIR;
+                mask &= ~FAN_ONDIR;
+        }
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
        if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
 #else
@@ -719,6 +792,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
        ret = -EINVAL;
        if (unlikely(filp->f_op != &fanotify_fops))
                goto fput_and_out;
+        group = filp->private_data;
+        /*
+         * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
+         * allowed to set permissions events.
+         */
+        ret = -EINVAL;
+        if (mask & FAN_ALL_PERM_EVENTS &&
+            group->priority == FS_PRIO_0)
+                goto fput_and_out;
        ret = fanotify_find_path(dfd, pathname, &path, flags);
        if (ret)
@@ -729,7 +812,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
                inode = path.dentry->d_inode;
        else
                mnt = path.mnt;
-        group = filp->private_data;
        /* create/update an inode mark */
        switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4498a208df94..20dc218707ca 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -84,16 +84,17 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 {
        struct dentry *parent;
        struct inode *p_inode;
+        int ret = 0;
        if (!dentry)
                dentry = path->dentry;
        if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
-                return;
+                return 0;
        parent = dget_parent(dentry);
        p_inode = parent->d_inode;
@@ -106,14 +107,16 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
                mask |= FS_EVENT_ON_CHILD;
                if (path)
-                        fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
+                        ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
-                                 dentry->d_name.name, 0);
+                                       dentry->d_name.name, 0);
                else
-                        fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+                        ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-                                 dentry->d_name.name, 0);
+                                       dentry->d_name.name, 0);
        }
        dput(parent);
+        return ret;
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
@@ -252,20 +255,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                if (inode_group > vfsmount_group) {
                        /* handle inode */
-                        send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
+                        ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
-                                      data_is, cookie, file_name, &event);
+                                            data_is, cookie, file_name, &event);
                        /* we didn't use the vfsmount_mark */
                        vfsmount_group = NULL;
                } else if (vfsmount_group > inode_group) {
-                        send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+                        ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
-                                      data_is, cookie, file_name, &event);
+                                            data_is, cookie, file_name, &event);
                        inode_group = NULL;
                } else {
-                        send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+                        ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
-                                      mask, data, data_is, cookie, file_name,
+                                            mask, data, data_is, cookie, file_name,
-                                      &event);
+                                            &event);
                }
+                if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
+                        goto out;
                if (inode_group)
                        inode_node = srcu_dereference(inode_node->next,
                                                      &fsnotify_mark_srcu);
@@ -273,7 +279,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                        vfsmount_node = srcu_dereference(vfsmount_node->next,
                                                         &fsnotify_mark_srcu);
        }
+        ret = 0;
+out:
        srcu_read_unlock(&fsnotify_mark_srcu, idx);
        /*
         * fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 21ed10660b80..4c29fcf557d1 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
 * Attach an initialized mark to a given inode.
 * These marks may be used for the fsnotify backend to determine which
 * event types should be delivered to which group and for which inodes.  These
- * marks are ordered according to the group's location in memory.
+ * marks are ordered according to priority, highest number first, and then by
+ * the group's location in memory.
 */
 int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
                            struct fsnotify_group *group, struct inode *inode,
@@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
                        goto out;
                }
-                if (mark->group < lmark->group)
+                if (mark->group->priority < lmark->group->priority)
+                        continue;
+                if ((mark->group->priority == lmark->group->priority) &&
+                    (mark->group < lmark->group))
                        continue;
                hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 24edc1185d53..444c305a468c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -862,7 +862,7 @@ static int __init inotify_user_setup(void)
        BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
        BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
        BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
-        BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+        BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
        BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
        BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b578fbd..85eebff6d0d7 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
                        goto out;
                }
-                if (mark->group < lmark->group)
+                if (mark->group->priority < lmark->group->priority)
+                        continue;
+                if ((mark->group->priority == lmark->group->priority) &&
+                    (mark->group < lmark->group))
                        continue;
                hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index d3fbe5730bfc..a30ecacc01f2 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3059,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
 /* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
-static int ntfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *ntfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
-                           mnt);
 }
 static struct file_system_type ntfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ntfs",
-        .get_sb         = ntfs_get_sb,
+        .mount          = ntfs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 75e115f1bd73..b2df490a19ed 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -643,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
        .setattr        = dlmfs_file_setattr,
 };
-static int dlmfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
+        return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
 }
 static struct file_system_type dlmfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ocfs2_dlmfs",
-        .get_sb         = dlmfs_get_sb,
+        .mount          = dlmfs_mount,
        .kill_sb        = kill_litter_super,
 };
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb395820..f02c0ef31578 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1236,14 +1236,12 @@ read_super_error:
        return status;
 }
-static int ocfs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
                        int flags,
                        const char *dev_name,
-                        void *data,
+                        void *data)
-                        struct vfsmount *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
-                           mnt);
 }
 static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1265,7 @@ out:
 static struct file_system_type ocfs2_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ocfs2",
-        .get_sb         = ocfs2_get_sb, /* is this called when we mount
+        .mount          = ocfs2_mount,
-                                        * the fs? */
        .kill_sb        = ocfs2_kill_sb,
        .fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291a..e043c4cb9a97 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
        return ret;
 }
-static int omfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *omfs_mount(struct file_system_type *fs_type,
-                        int flags, const char *dev_name,
+                        int flags, const char *dev_name, void *data)
-                        void *data, struct vfsmount *m)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+        return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
 }
 static struct file_system_type omfs_fs_type = {
        .owner = THIS_MODULE,
        .name = "omfs",
-        .get_sb = omfs_get_sb,
+        .mount = omfs_mount,
        .kill_sb = kill_block_super,
        .fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/open.c b/fs/open.c
index d74e1983e8dc..4197b9ed023d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
        /* Pick up the filp from the open intent */
        filp = nd->intent.open.file;
        /* Has the filesystem initialised the file for us? */
-        if (filp->f_path.dentry == NULL)
+        if (filp->f_path.dentry == NULL) {
+                path_get(&nd->path);
                filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
                                     NULL, cred);
-        else
+        }
-                path_put(&nd->path);
        return filp;
 }
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012c..911e61f348fc 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
        return ret;
 }
-static int openprom_get_sb(struct file_system_type *fs_type,
+static struct dentry *openprom_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
+        return mount_single(fs_type, flags, data, openprom_fill_super);
 }
 static struct file_system_type openprom_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "openpromfs",
-        .get_sb         = openprom_get_sb,
+        .mount          = openprom_mount,
        .kill_sb        = kill_anon_super,
 };
diff --git a/fs/pipe.c b/fs/pipe.c
index d2d7566ce68e..a8012a955720 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1247,16 +1247,15 @@ out:
 * any operations on the root directory. However, we need a non-trivial
 * d_name - pipe: will go nicely and kill the special-casing in procfs.
 */
-static int pipefs_get_sb(struct file_system_type *fs_type,
+static struct dentry *pipefs_mount(struct file_system_type *fs_type,
-                         int flags, const char *dev_name, void *data,
+                         int flags, const char *dev_name, void *data)
-                         struct vfsmount *mnt)
 {
-        return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
+        return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
 }
 static struct file_system_type pipe_fs_type = {
        .name           = "pipefs",
-        .get_sb         = pipefs_get_sb,
+        .mount          = pipefs_mount,
        .kill_sb        = kill_anon_super,
 };
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b316325..ef9fa8e24ad6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
        return set_anon_super(sb, NULL);
 }
-static int proc_get_sb(struct file_system_type *fs_type,
+static struct dentry *proc_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
        int err;
        struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
        sb = sget(fs_type, proc_test_super, proc_set_super, ns);
        if (IS_ERR(sb))
-                return PTR_ERR(sb);
+                return ERR_CAST(sb);
        if (!sb->s_root) {
                sb->s_flags = flags;
                err = proc_fill_super(sb);
                if (err) {
                        deactivate_locked_super(sb);
-                        return err;
+                        return ERR_PTR(err);
                }
                ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
                }
                sb->s_flags |= MS_ACTIVE;
-                ns->proc_mnt = mnt;
        }
-        simple_set_mnt(mnt, sb);
+        return dget(sb->s_root);
-        return 0;
 }
 static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
 static struct file_system_type proc_fs_type = {
        .name           = "proc",
-        .get_sb         = proc_get_sb,
+        .mount          = proc_mount,
        .kill_sb        = proc_kill_sb,
 };
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
                return;
        }
+        init_pid_ns.proc_mnt = proc_mnt;
        proc_symlink("mounts", NULL, "self/mounts");
        proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);
+        ns->proc_mnt = mnt;
        return 0;
 }
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026fc..fcada42f1aa3 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
        kmem_cache_destroy(qnx4_inode_cachep);
 }
-static int qnx4_get_sb(struct file_system_type *fs_type,
+static struct dentry *qnx4_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
-                           mnt);
 }
 static struct file_system_type qnx4_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "qnx4",
-        .get_sb         = qnx4_get_sb,
+        .mount          = qnx4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 67fadb1ad2c1..eacb166fb259 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -255,17 +255,16 @@ fail:
        return err;
 }
-int ramfs_get_sb(struct file_system_type *fs_type,
+struct dentry *ramfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
+        return mount_nodev(fs_type, flags, data, ramfs_fill_super);
 }
-static int rootfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
+        return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
-                            mnt);
 }
 static void ramfs_kill_sb(struct super_block *sb)
@@ -276,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
 static struct file_system_type ramfs_fs_type = {
        .name           = "ramfs",
-        .get_sb         = ramfs_get_sb,
+        .mount          = ramfs_mount,
        .kill_sb        = ramfs_kill_sb,
 };
 static struct file_system_type rootfs_fs_type = {
        .name           = "rootfs",
-        .get_sb         = rootfs_get_sb,
+        .mount          = rootfs_mount,
        .kill_sb        = kill_litter_super,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 9cd9d148105d..431a0ed610c8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -243,8 +243,6 @@ bad:
 * them to something that fits in "int" so that others
 * won't have to do range checks all the time.
 */
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 {
        struct inode *inode;
@@ -584,65 +582,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
                              unsigned long nr_segs, unsigned long fast_segs,
                              struct iovec *fast_pointer,
                              struct iovec **ret_pointer)
-  {
+{
        unsigned long seg;
-        ssize_t ret;
+        ssize_t ret;
        struct iovec *iov = fast_pointer;
-        /*
+        /*
-         * SuS says "The readv() function *may* fail if the iovcnt argument
+         * SuS says "The readv() function *may* fail if the iovcnt argument
-         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+         * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-         * traditionally returned zero for zero segments, so...
+         * traditionally returned zero for zero segments, so...
-         */
+         */
        if (nr_segs == 0) {
                ret = 0;
-                goto out;
+                goto out;
        }
-        /*
+        /*
-         * First get the "struct iovec" from user memory and
+         * First get the "struct iovec" from user memory and
-         * verify all the pointers
+         * verify all the pointers
-         */
+         */
        if (nr_segs > UIO_MAXIOV) {
                ret = -EINVAL;
-                goto out;
+                goto out;
        }
        if (nr_segs > fast_segs) {
-                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+                iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
                if (iov == NULL) {
                        ret = -ENOMEM;
-                        goto out;
+                        goto out;
                }
-        }
+        }
        if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
                ret = -EFAULT;
-                goto out;
+                goto out;
        }
-        /*
+        /*
         * According to the Single Unix Specification we should return EINVAL
         * if an element length is < 0 when cast to ssize_t or if the
         * total length would overflow the ssize_t return value of the
         * system call.
-         */
+         *
+         * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+         * overflow case.
+         */
        ret = 0;
-        for (seg = 0; seg < nr_segs; seg++) {
+        for (seg = 0; seg < nr_segs; seg++) {
-                void __user *buf = iov[seg].iov_base;
+                void __user *buf = iov[seg].iov_base;
-                ssize_t len = (ssize_t)iov[seg].iov_len;
+                ssize_t len = (ssize_t)iov[seg].iov_len;
                /* see if we we're about to use an invalid len or if
                 * it's about to overflow ssize_t */
-                if (len < 0 || (ret + len < ret)) {
+                if (len < 0) {
                        ret = -EINVAL;
-                        goto out;
+                        goto out;
                }
                if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
                        ret = -EFAULT;
-                        goto out;
+                        goto out;
+                }
+                if (len > MAX_RW_COUNT - ret) {
+                        len = MAX_RW_COUNT - ret;
+                        iov[seg].iov_len = len;
                }
                ret += len;
-        }
+        }
 out:
        *ret_pointer = iov;
        return ret;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002d..3bf7a6457f4d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2213,12 +2213,11 @@ out:
 #endif
-static int get_super_block(struct file_system_type *fs_type,
+static struct dentry *get_super_block(struct file_system_type *fs_type,
                           int flags, const char *dev_name,
-                           void *data, struct vfsmount *mnt)
+                           void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
-                           mnt);
 }
 static int __init init_reiserfs_fs(void)
@@ -2253,7 +2252,7 @@ static void __exit exit_reiserfs_fs(void)
 struct file_system_type reiserfs_fs_type = {
        .owner = THIS_MODULE,
        .name = "reiserfs",
-        .get_sb = get_super_block,
+        .mount = get_super_block,
        .kill_sb = reiserfs_kill_sb,
        .fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c92..6647f90e55cd 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
 /*
 * get a superblock for mounting
 */
-static int romfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *romfs_mount(struct file_system_type *fs_type,
                        int flags, const char *dev_name,
-                        void *data, struct vfsmount *mnt)
+                        void *data)
 {
-        int ret = -EINVAL;
+        struct dentry *ret = ERR_PTR(-EINVAL);
 #ifdef CONFIG_ROMFS_ON_MTD
-        ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super,
+        ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
-                         mnt);
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
-        if (ret == -EINVAL)
+        if (ret == ERR_PTR(-EINVAL))
-                ret = get_sb_bdev(fs_type, flags, dev_name, data,
+                ret = mount_bdev(fs_type, flags, dev_name, data,
-                                  romfs_fill_super, mnt);
+                                  romfs_fill_super);
 #endif
        return ret;
 }
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
 static struct file_system_type romfs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "romfs",
-        .get_sb         = romfs_get_sb,
+        .mount          = romfs_mount,
        .kill_sb        = romfs_kill_sb,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f1156048..24de30ba34c1 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
 }
-static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
-                                const char *dev_name, void *data,
+                                const char *dev_name, void *data)
-                                struct vfsmount *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
-                                mnt);
 }
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
 static struct file_system_type squashfs_fs_type = {
        .owner = THIS_MODULE,
        .name = "squashfs",
-        .get_sb = squashfs_get_sb,
+        .mount = squashfs_mount,
        .kill_sb = kill_block_super,
        .fs_flags = FS_REQUIRES_DEV
 };
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b8541f9c6..3876c36699a1 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
                                        strncmp(target, name, name_size) == 0) {
                        /* found xattr */
                        if (type & SQUASHFS_XATTR_VALUE_OOL) {
-                                __le64 xattr;
+                                __le64 xattr_val;
+                                u64 xattr;
                                /* val is a reference to the real location */
                                err = squashfs_read_metadata(sb, &val, &start,
                                                &offset, sizeof(val));
                                if (err < 0)
                                        goto failed;
-                                err = squashfs_read_metadata(sb, &xattr, &start,
+                                err = squashfs_read_metadata(sb, &xattr_val,
-                                         &offset, sizeof(xattr));
+                                        &start, &offset, sizeof(xattr_val));
                                if (err < 0)
                                        goto failed;
-                                xattr = le64_to_cpu(xattr);
+                                xattr = le64_to_cpu(xattr_val);
                                start = SQUASHFS_XATTR_BLK(xattr) +
                                                        msblk->xattr_table;
                                offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d719fbf..b634efce4bde 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
 extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
                u64 *, int *);
 extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
-                int *, unsigned long long *);
+                unsigned int *, unsigned long long *);
 #else
 static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
                u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
 }
 static inline int squashfs_xattr_lookup(struct super_block *sb,
-                unsigned int index, int *count, int *size,
+                unsigned int index, int *count, unsigned int *size,
                unsigned long long *xattr)
 {
        return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb41106098f..d33be5dd6c32 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs_fs_i.h"
 #include "squashfs.h"
+#include "xattr.h"
 /*
 * Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index b9c9869165db..ca696155cd9a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
        return set_anon_super(sb, NULL);
 }
-int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
+struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
-        int (*fill_super)(struct super_block *, void *, int),
+        void *data, int (*fill_super)(struct super_block *, void *, int))
-        struct vfsmount *mnt)
 {
        struct super_block *sb;
        sb = sget(fs_type, ns_test_super, ns_set_super, data);
        if (IS_ERR(sb))
-                return PTR_ERR(sb);
+                return ERR_CAST(sb);
        if (!sb->s_root) {
                int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
                err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (err) {
                        deactivate_locked_super(sb);
-                        return err;
+                        return ERR_PTR(err);
                }
                sb->s_flags |= MS_ACTIVE;
        }
-        simple_set_mnt(mnt, sb);
+        return dget(sb->s_root);
-        return 0;
 }
-EXPORT_SYMBOL(get_sb_ns);
+EXPORT_SYMBOL(mount_ns);
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +760,9 @@ static int test_bdev_super(struct super_block *s, void *data)
        return (void *)s->s_bdev == data;
 }
-int get_sb_bdev(struct file_system_type *fs_type,
+struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
-        int (*fill_super)(struct super_block *, void *, int),
+        int (*fill_super)(struct super_block *, void *, int))
-        struct vfsmount *mnt)
 {
        struct block_device *bdev;
        struct super_block *s;
@@ -777,7 +774,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
        bdev = open_bdev_exclusive(dev_name, mode, fs_type);
        if (IS_ERR(bdev))
-                return PTR_ERR(bdev);
+                return ERR_CAST(bdev);
        /*
         * once the super is inserted into the list by sget, s_umount
@@ -829,15 +826,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
                bdev->bd_super = s;
        }
-        simple_set_mnt(mnt, s);
+        return dget(s->s_root);
-        return 0;
 error_s:
        error = PTR_ERR(s);
 error_bdev:
        close_bdev_exclusive(bdev, mode);
 error:
-        return error;
+        return ERR_PTR(error);
+}
+EXPORT_SYMBOL(mount_bdev);
+int get_sb_bdev(struct file_system_type *fs_type,
+        int flags, const char *dev_name, void *data,
+        int (*fill_super)(struct super_block *, void *, int),
+        struct vfsmount *mnt)
+{
+        struct dentry *root;
+        root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
+        if (IS_ERR(root))
+                return PTR_ERR(root);
+        mnt->mnt_root = root;
+        mnt->mnt_sb = root->d_sb;
+        return 0;
 }
 EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +868,42 @@ void kill_block_super(struct super_block *sb)
 EXPORT_SYMBOL(kill_block_super);
 #endif
-int get_sb_nodev(struct file_system_type *fs_type,
+struct dentry *mount_nodev(struct file_system_type *fs_type,
        int flags, void *data,
-        int (*fill_super)(struct super_block *, void *, int),
+        int (*fill_super)(struct super_block *, void *, int))
-        struct vfsmount *mnt)
 {
        int error;
        struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
        if (IS_ERR(s))
-                return PTR_ERR(s);
+                return ERR_CAST(s);
        s->s_flags = flags;
        error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
        if (error) {
                deactivate_locked_super(s);
-                return error;
+                return ERR_PTR(error);
        }
        s->s_flags |= MS_ACTIVE;
-        simple_set_mnt(mnt, s);
+        return dget(s->s_root);
-        return 0;
 }
+EXPORT_SYMBOL(mount_nodev);
+int get_sb_nodev(struct file_system_type *fs_type,
+        int flags, void *data,
+        int (*fill_super)(struct super_block *, void *, int),
+        struct vfsmount *mnt)
+{
+        struct dentry *root;
+        root = mount_nodev(fs_type, flags, data, fill_super);
+        if (IS_ERR(root))
+                return PTR_ERR(root);
+        mnt->mnt_root = root;
+        mnt->mnt_sb = root->d_sb;
+        return 0;
+}
 EXPORT_SYMBOL(get_sb_nodev);
 static int compare_single(struct super_block *s, void *p)
@@ -886,29 +911,42 @@ static int compare_single(struct super_block *s, void *p)
        return 1;
 }
-int get_sb_single(struct file_system_type *fs_type,
+struct dentry *mount_single(struct file_system_type *fs_type,
        int flags, void *data,
-        int (*fill_super)(struct super_block *, void *, int),
+        int (*fill_super)(struct super_block *, void *, int))
-        struct vfsmount *mnt)
 {
        struct super_block *s;
        int error;
        s = sget(fs_type, compare_single, set_anon_super, NULL);
        if (IS_ERR(s))
-                return PTR_ERR(s);
+                return ERR_CAST(s);
        if (!s->s_root) {
                s->s_flags = flags;
                error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(s);
-                        return error;
+                        return ERR_PTR(error);
                }
                s->s_flags |= MS_ACTIVE;
        } else {
                do_remount_sb(s, flags, data, 0);
        }
-        simple_set_mnt(mnt, s);
+        return dget(s->s_root);
+}
+EXPORT_SYMBOL(mount_single);
+int get_sb_single(struct file_system_type *fs_type,
+        int flags, void *data,
+        int (*fill_super)(struct super_block *, void *, int),
+        struct vfsmount *mnt)
+{
+        struct dentry *root;
+        root = mount_single(fs_type, flags, data, fill_super);
+        if (IS_ERR(root))
+                return PTR_ERR(root);
+        mnt->mnt_root = root;
+        mnt->mnt_sb = root->d_sb;
        return 0;
 }
@@ -918,6 +956,7 @@ struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
        struct vfsmount *mnt;
+        struct dentry *root;
        char *secdata = NULL;
        int error;
@@ -942,9 +981,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
                        goto out_free_secdata;
        }
-        error = type->get_sb(type, flags, name, data, mnt);
+        if (type->mount) {
-        if (error < 0)
+                root = type->mount(type, flags, name, data);
-                goto out_free_secdata;
+                if (IS_ERR(root)) {
+                        error = PTR_ERR(root);
+                        goto out_free_secdata;
+                }
+                mnt->mnt_root = root;
+                mnt->mnt_sb = root->d_sb;
+        } else {
+                error = type->get_sb(type, flags, name, data, mnt);
+                if (error < 0)
+                        goto out_free_secdata;
+        }
        BUG_ON(!mnt->mnt_sb);
        WARN_ON(!mnt->mnt_sb->s_bdi);
        mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af22574c50..266895783b47 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
 #include "sysfs.h"
-static struct vfsmount *sysfs_mount;
+static struct vfsmount *sysfs_mnt;
 struct kmem_cache *sysfs_dir_cachep;
 static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
        return error;
 }
-static int sysfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *sysfs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
        struct sysfs_super_info *info;
        enum kobj_ns_type type;
        struct super_block *sb;
        int error;
-        error = -ENOMEM;
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info)
-                goto out;
+                return ERR_PTR(-ENOMEM);
        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
                info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
        sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
        if (IS_ERR(sb) || sb->s_fs_info != info)
                kfree(info);
-        if (IS_ERR(sb)) {
+        if (IS_ERR(sb))
-                error = PTR_ERR(sb);
+                return ERR_CAST(sb);
-                goto out;
-        }
        if (!sb->s_root) {
                sb->s_flags = flags;
                error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(sb);
-                        goto out;
+                        return ERR_PTR(error);
                }
                sb->s_flags |= MS_ACTIVE;
        }
-        simple_set_mnt(mnt, sb);
+        return dget(sb->s_root);
-        error = 0;
-out:
-        return error;
 }
 static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
 static struct file_system_type sysfs_fs_type = {
        .name           = "sysfs",
-        .get_sb         = sysfs_get_sb,
+        .mount          = sysfs_mount,
        .kill_sb        = sysfs_kill_sb,
 };
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
        err = register_filesystem(&sysfs_fs_type);
        if (!err) {
-                sysfs_mount = kern_mount(&sysfs_fs_type);
+                sysfs_mnt = kern_mount(&sysfs_fs_type);
-                if (IS_ERR(sysfs_mount)) {
+                if (IS_ERR(sysfs_mnt)) {
                        printk(KERN_ERR "sysfs: could not mount!\n");
-                        err = PTR_ERR(sysfs_mount);
+                        err = PTR_ERR(sysfs_mnt);
-                        sysfs_mount = NULL;
+                        sysfs_mnt = NULL;
                        unregister_filesystem(&sysfs_fs_type);
                        goto out_err;
                }
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927e..3d9c62be0c10 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
 /* Every kernel module contains stuff like this. */
-static int sysv_get_sb(struct file_system_type *fs_type,
+static struct dentry *sysv_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
-                           mnt);
 }
-static int v7_get_sb(struct file_system_type *fs_type,
+static struct dentry *v7_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
 }
 static struct file_system_type sysv_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "sysv",
-        .get_sb         = sysv_get_sb,
+        .mount          = sysv_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
 static struct file_system_type v7_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "v7",
-        .get_sb         = v7_get_sb,
+        .mount          = v7_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f0ad07..91fac54c70e3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2038,8 +2038,8 @@ static int sb_test(struct super_block *sb, void *data)
        return c->vi.cdev == *dev;
 }
-static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
+static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
-                        const char *name, void *data, struct vfsmount *mnt)
+                        const char *name, void *data)
 {
        struct ubi_volume_desc *ubi;
        struct ubi_volume_info vi;
@@ -2057,7 +2057,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
        if (IS_ERR(ubi)) {
                dbg_err("cannot open \"%s\", error %d",
                        name, (int)PTR_ERR(ubi));
-                return PTR_ERR(ubi);
+                return ERR_CAST(ubi);
        }
        ubi_get_volume_info(ubi, &vi);
@@ -2095,20 +2095,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
        /* 'fill_super()' opens ubi again so we must close it here */
        ubi_close_volume(ubi);
-        simple_set_mnt(mnt, sb);
+        return dget(sb->s_root);
-        return 0;
 out_deact:
        deactivate_locked_super(sb);
 out_close:
        ubi_close_volume(ubi);
-        return err;
+        return ERR_PTR(err);
 }
 static struct file_system_type ubifs_fs_type = {
        .name    = "ubifs",
        .owner   = THIS_MODULE,
-        .get_sb  = ubifs_get_sb,
+        .mount   = ubifs_mount,
        .kill_sb = kill_anon_super,
 };
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b40..4a5c7c61836a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 }
 /* UDF filesystem type */
-static int udf_get_sb(struct file_system_type *fs_type,
+static struct dentry *udf_mount(struct file_system_type *fs_type,
-                      int flags, const char *dev_name, void *data,
+                      int flags, const char *dev_name, void *data)
-                      struct vfsmount *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
 }
 static struct file_system_type udf_fstype = {
        .owner          = THIS_MODULE,
        .name           = "udf",
-        .get_sb         = udf_get_sb,
+        .mount          = udf_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7d..2c47daed56da 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
        .show_options   = ufs_show_options,
 };
-static int ufs_get_sb(struct file_system_type *fs_type,
+static struct dentry *ufs_mount(struct file_system_type *fs_type,
-        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+        int flags, const char *dev_name, void *data)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
+        return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
 }
 static struct file_system_type ufs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ufs",
-        .get_sb         = ufs_get_sb,
+        .mount          = ufs_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48fffcd7..7d287afccde5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1111,11 +1111,12 @@ xfs_vm_writepage(
                        uptodate = 0;
                /*
-                 * A hole may still be marked uptodate because discard_buffer
+                 * set_page_dirty dirties all buffers in a page, independent
-                 * leaves the flag set.
+                 * of their state.  The dirty state however is entirely
+                 * meaningless for holes (!mapped && uptodate), so skip
+                 * buffers covering holes here.
                 */
                if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                        ASSERT(!buffer_dirty(bh));
                        imap_valid = 0;
                        continue;
                }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c07cb57..aa1d353def29 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1781,7 +1781,6 @@ xfs_buf_delwri_split(
        INIT_LIST_HEAD(list);
        spin_lock(dwlk);
        list_for_each_entry_safe(bp, n, dwq, b_list) {
-                trace_xfs_buf_delwri_split(bp, _RET_IP_);
                ASSERT(bp->b_flags & XBF_DELWRI);
                if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1794,7 @@ xfs_buf_delwri_split(
                                         _XBF_RUN_QUEUES);
                        bp->b_flags |= XBF_WRITE;
                        list_move_tail(&bp->b_list, list);
+                        trace_xfs_buf_delwri_split(bp, _RET_IP_);
                } else
                        skipped++;
        }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38e..ad442d9e392e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
-        kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+        kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
        if (!kbuf)
                goto out_dput;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107efc0c61..94d5fd6a2973 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
        inode->i_state = I_NEW;
        inode_sb_list_add(inode);
-        insert_inode_hash(inode);
+        /* make the inode look hashed for the writeback code */
+        hlist_add_fake(&inode->i_hash);
        inode->i_mode   = ip->i_d.di_mode;
        inode->i_nlink  = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index cf808782c065..064f964d4f3c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
                        mp->m_flags |= XFS_MOUNT_DELAYLOG;
-                        cmn_err(CE_WARN,
-                                "Enabling EXPERIMENTAL delayed logging feature "
-                                "- use at your own risk.\n");
                } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
                        mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
                } else if (!strcmp(this_char, "ihashsize")) {
@@ -1609,16 +1606,14 @@ xfs_fs_fill_super(
        goto out_free_sb;
 }
-STATIC int
+STATIC struct dentry *
-xfs_fs_get_sb(
+xfs_fs_mount(
        struct file_system_type *fs_type,
        int                     flags,
        const char              *dev_name,
-        void                    *data,
+        void                    *data)
-        struct vfsmount         *mnt)
 {
-        return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
+        return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
-                           mnt);
 }
 static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1634,7 @@ static const struct super_operations xfs_super_operations = {
 static struct file_system_type xfs_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "xfs",
-        .get_sb                 = xfs_fs_get_sb,
+        .mount                  = xfs_fs_mount,
        .kill_sb                = kill_block_super,
        .fs_flags               = FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981d..afb0d7cfad1c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
                if (trylock) {
                        if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
                                skipped++;
+                                xfs_perag_put(pag);
                                continue;
                        }
                        first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce5699..9124425b7f2f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
         * If the file's parent directory is known, take its iolock in exclusive
         * mode to prevent two sibling files from racing each other to migrate
         * themselves and their parent to different AGs.
+         *
+         * Note that we lock the parent directory iolock inside the child
+         * iolock here.  That's fine as we never hold both parent and child
+         * iolock in any other place.  This is different from the ilock,
+         * which requires locking of the child after the parent for namespace
+         * operations.
         */
        if (pip)
-                xfs_ilock(pip, XFS_IOLOCK_EXCL);
+                xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        /*
         * A new AG needs to be found for the file.  If the file's parent
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a399..19e9dfa1c254 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
                pag = radix_tree_delete(&mp->m_perag_tree, agno);
                spin_unlock(&mp->m_perag_lock);
                ASSERT(pag);
+                ASSERT(atomic_read(&pag->pag_ref) == 0);
                call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd6..9bb6eda4cd21 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
 #define xfs_trans_apply_dquot_deltas(tp)
 #define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)      (0)
+static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
-#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)      (0)
+                struct xfs_inode *ip, long nblks, long ninos, uint flags)
+{
+        return 0;
+}
+static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
+                struct xfs_mount *mp, struct xfs_dquot *udqp,
+                struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
+{
+        return 0;
+}
 #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
 #define xfs_qm_vop_rename_dqattach(it)                                  (0)
 #define xfs_qm_vop_chown(tp, ip, old, new)                              (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
 #define xfs_qm_dqdetach(ip)
 #define xfs_qm_dqrele(d)
 #define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, fl)                                             (0)
+static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
+{
+        return 0;
+}
 #define xfs_qm_newmount(mp, a, b)                                       (0)
 #define xfs_qm_mount_quotas(mp)
 #define xfs_qm_unmount(mp)
-#define xfs_qm_unmount_quotas(mp)                                       (0)
+#define xfs_qm_unmount_quotas(mp)
 #endif /* CONFIG_XFS_QUOTA */
 #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \