Merge branch 'sh/stable-updates' into sh-latest

author: Paul Mundt <lethal@linux-sh.org> 2011-06-21 04:47:44 -0400
committer: Paul Mundt <lethal@linux-sh.org> 2011-06-21 04:47:44 -0400
commit: 9dd056e9eba106ef622795b566f769a9ab0a49a8 (patch)
tree: 390c07ea2c3b9631c2e8fe64c1a6feba0b503e8b /fs
parent: 08ef2e427b59393d68a65b16e97e894b662a5573 (diff)
parent: f2b9726105824fdeea32a339e5072a358f89a25b (diff)
41 files changed, 419 insertions, 514 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 20c106f24927..1b0b19550015 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -584,11 +584,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 success:
        d_add(dentry, inode);
-        _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
+        _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
               fid.vnode,
               fid.unique,
               dentry->d_inode->i_ino,
-               (unsigned long long)dentry->d_inode->i_version);
+               dentry->d_inode->i_generation);
        return NULL;
 }
@@ -671,10 +671,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
                 * been deleted and replaced, and the original vnode ID has
                 * been reused */
                if (fid.unique != vnode->fid.unique) {
-                        _debug("%s: file deleted (uq %u -> %u I:%llu)",
+                        _debug("%s: file deleted (uq %u -> %u I:%u)",
                               dentry->d_name.name, fid.unique,
                               vnode->fid.unique,
-                               (unsigned long long)dentry->d_inode->i_version);
+                               dentry->d_inode->i_generation);
                        spin_lock(&vnode->lock);
                        set_bit(AFS_VNODE_DELETED, &vnode->flags);
                        spin_unlock(&vnode->lock);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 4bd0218473a9..346e3289abd7 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -89,7 +89,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
                        i_size_write(&vnode->vfs_inode, size);
                        vnode->vfs_inode.i_uid = status->owner;
                        vnode->vfs_inode.i_gid = status->group;
-                        vnode->vfs_inode.i_version = vnode->fid.unique;
+                        vnode->vfs_inode.i_generation = vnode->fid.unique;
                        vnode->vfs_inode.i_nlink = status->nlink;
                        mode = vnode->vfs_inode.i_mode;
@@ -102,6 +102,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
                vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server;
                vnode->vfs_inode.i_mtime        = vnode->vfs_inode.i_ctime;
                vnode->vfs_inode.i_atime        = vnode->vfs_inode.i_ctime;
+                vnode->vfs_inode.i_version      = data_version;
        }
        expected_version = status->data_version;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index db66c5201474..0fdab6e03d87 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -75,7 +75,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
        inode->i_ctime.tv_nsec  = 0;
        inode->i_atime          = inode->i_mtime = inode->i_ctime;
        inode->i_blocks         = 0;
-        inode->i_version        = vnode->fid.unique;
+        inode->i_generation     = vnode->fid.unique;
+        inode->i_version        = vnode->status.data_version;
        inode->i_mapping->a_ops = &afs_fs_aops;
        /* check to see whether a symbolic link is really a mountpoint */
@@ -100,7 +101,7 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
        struct afs_iget_data *data = opaque;
        return inode->i_ino == data->fid.vnode &&
-                inode->i_version == data->fid.unique;
+                inode->i_generation == data->fid.unique;
 }
 /*
@@ -122,7 +123,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
        struct afs_vnode *vnode = AFS_FS_I(inode);
        inode->i_ino = data->fid.vnode;
-        inode->i_version = data->fid.unique;
+        inode->i_generation = data->fid.unique;
        vnode->fid = data->fid;
        vnode->volume = data->volume;
@@ -380,8 +381,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        inode = dentry->d_inode;
-        _enter("{ ino=%lu v=%llu }", inode->i_ino,
+        _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
-                (unsigned long long)inode->i_version);
        generic_fillattr(inode, stat);
        return 0;
diff --git a/fs/afs/super.c b/fs/afs/super.c
index fb240e8766d6..356dcf0929e8 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -31,8 +31,8 @@
 static void afs_i_init_once(void *foo);
 static struct dentry *afs_mount(struct file_system_type *fs_type,
                      int flags, const char *dev_name, void *data);
+static void afs_kill_super(struct super_block *sb);
 static struct inode *afs_alloc_inode(struct super_block *sb);
-static void afs_put_super(struct super_block *sb);
 static void afs_destroy_inode(struct inode *inode);
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -40,7 +40,7 @@ struct file_system_type afs_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "afs",
        .mount          = afs_mount,
-        .kill_sb        = kill_anon_super,
+        .kill_sb        = afs_kill_super,
        .fs_flags       = 0,
 };
@@ -50,7 +50,6 @@ static const struct super_operations afs_super_ops = {
        .drop_inode     = afs_drop_inode,
        .destroy_inode  = afs_destroy_inode,
        .evict_inode    = afs_evict_inode,
-        .put_super      = afs_put_super,
        .show_options   = generic_show_options,
 };
@@ -282,19 +281,25 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 */
 static int afs_test_super(struct super_block *sb, void *data)
 {
-        struct afs_mount_params *params = data;
+        struct afs_super_info *as1 = data;
        struct afs_super_info *as = sb->s_fs_info;
-        return as->volume == params->volume;
+        return as->volume == as1->volume;
+}
+static int afs_set_super(struct super_block *sb, void *data)
+{
+        sb->s_fs_info = data;
+        return set_anon_super(sb, NULL);
 }
 /*
 * fill in the superblock
 */
-static int afs_fill_super(struct super_block *sb, void *data)
+static int afs_fill_super(struct super_block *sb,
+                          struct afs_mount_params *params)
 {
-        struct afs_mount_params *params = data;
+        struct afs_super_info *as = sb->s_fs_info;
-        struct afs_super_info *as = NULL;
        struct afs_fid fid;
        struct dentry *root = NULL;
        struct inode *inode = NULL;
@@ -302,23 +307,13 @@ static int afs_fill_super(struct super_block *sb, void *data)
        _enter("");
-        /* allocate a superblock info record */
-        as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
-        if (!as) {
-                _leave(" = -ENOMEM");
-                return -ENOMEM;
-        }
-        afs_get_volume(params->volume);
-        as->volume = params->volume;
        /* fill in the superblock */
        sb->s_blocksize         = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
        sb->s_magic             = AFS_FS_MAGIC;
        sb->s_op                = &afs_super_ops;
-        sb->s_fs_info           = as;
        sb->s_bdi               = &as->volume->bdi;
+        strlcpy(sb->s_id, as->volume->vlocation->vldb.name, sizeof(sb->s_id));
        /* allocate the root inode and dentry */
        fid.vid         = as->volume->vid;
@@ -326,7 +321,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
        fid.unique      = 1;
        inode = afs_iget(sb, params->key, &fid, NULL, NULL);
        if (IS_ERR(inode))
-                goto error_inode;
+                return PTR_ERR(inode);
        if (params->autocell)
                set_bit(AFS_VNODE_AUTOCELL, &AFS_FS_I(inode)->flags);
@@ -342,16 +337,8 @@ static int afs_fill_super(struct super_block *sb, void *data)
        _leave(" = 0");
        return 0;
-error_inode:
-        ret = PTR_ERR(inode);
-        inode = NULL;
 error:
        iput(inode);
-        afs_put_volume(as->volume);
-        kfree(as);
-        sb->s_fs_info = NULL;
        _leave(" = %d", ret);
        return ret;
 }
@@ -367,6 +354,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
        struct afs_volume *vol;
        struct key *key;
        char *new_opts = kstrdup(options, GFP_KERNEL);
+        struct afs_super_info *as;
        int ret;
        _enter(",,%s,%p", dev_name, options);
@@ -399,12 +387,22 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
                ret = PTR_ERR(vol);
                goto error;
        }
-        params.volume = vol;
+        /* allocate a superblock info record */
+        as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
+        if (!as) {
+                ret = -ENOMEM;
+                afs_put_volume(vol);
+                goto error;
+        }
+        as->volume = vol;
        /* allocate a deviceless superblock */
-        sb = sget(fs_type, afs_test_super, set_anon_super, &params);
+        sb = sget(fs_type, afs_test_super, afs_set_super, as);
        if (IS_ERR(sb)) {
                ret = PTR_ERR(sb);
+                afs_put_volume(vol);
+                kfree(as);
                goto error;
        }
@@ -422,16 +420,16 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
        } else {
                _debug("reuse");
                ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
+                afs_put_volume(vol);
+                kfree(as);
        }
-        afs_put_volume(params.volume);
        afs_put_cell(params.cell);
        kfree(new_opts);
        _leave(" = 0 [%p]", sb);
        return dget(sb->s_root);
 error:
-        afs_put_volume(params.volume);
        afs_put_cell(params.cell);
        key_put(params.key);
        kfree(new_opts);
@@ -439,18 +437,12 @@ error:
        return ERR_PTR(ret);
 }
-/*
+static void afs_kill_super(struct super_block *sb)
- * finish the unmounting process on the superblock
- */
-static void afs_put_super(struct super_block *sb)
 {
        struct afs_super_info *as = sb->s_fs_info;
+        kill_anon_super(sb);
-        _enter("");
        afs_put_volume(as->volume);
+        kfree(as);
-        _leave("");
 }
 /*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 789b3afb3423..b806285ff853 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -84,23 +84,21 @@ void afs_put_writeback(struct afs_writeback *wb)
 * partly or wholly fill a page that's under preparation for writing
 */
 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
-                         loff_t pos, unsigned len, struct page *page)
+                         loff_t pos, struct page *page)
 {
        loff_t i_size;
-        unsigned eof;
        int ret;
+        int len;
-        _enter(",,%llu,%u", (unsigned long long)pos, len);
+        _enter(",,%llu", (unsigned long long)pos);
-        ASSERTCMP(len, <=, PAGE_CACHE_SIZE);
        i_size = i_size_read(&vnode->vfs_inode);
-        if (pos + len > i_size)
+        if (pos + PAGE_CACHE_SIZE > i_size)
-                eof = i_size;
+                len = i_size - pos;
        else
-                eof = PAGE_CACHE_SIZE;
+                len = PAGE_CACHE_SIZE;
-        ret = afs_vnode_fetch_data(vnode, key, 0, eof, page);
+        ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
        if (ret < 0) {
                if (ret == -ENOENT) {
                        _debug("got NOENT from server"
@@ -153,9 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
        *pagep = page;
        /* page won't leak in error case: it eventually gets cleaned off LRU */
-        if (!PageUptodate(page)) {
+        if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
-                _debug("not up to date");
+                ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
-                ret = afs_fill_page(vnode, key, pos, len, page);
                if (ret < 0) {
                        kfree(candidate);
                        _leave(" = %d [prep]", ret);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9ad2369d9e35..bfcb18feb1df 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -231,9 +231,6 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
 static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
 {
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
        return -EIO;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 378b5b4443f3..300628795fdb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -967,6 +967,12 @@ struct btrfs_fs_info {
        struct srcu_struct subvol_srcu;
        spinlock_t trans_lock;
+        /*
+         * the reloc mutex goes with the trans lock, it is taken
+         * during commit to protect us from the relocation code
+         */
+        struct mutex reloc_mutex;
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
@@ -1172,6 +1178,14 @@ struct btrfs_root {
        u32 type;
        u64 highest_objectid;
+        /* btrfs_record_root_in_trans is a multi-step process,
+         * and it can race with the balancing code.   But the
+         * race is very small, and only the first time the root
+         * is added to each transaction.  So in_trans_setup
+         * is used to tell us when more checks are required
+         */
+        unsigned long in_trans_setup;
        int ref_cows;
        int track_dirty;
        int in_radix;
@@ -1181,7 +1195,6 @@ struct btrfs_root {
        struct btrfs_key defrag_max;
        int defrag_running;
        char *name;
-        int in_sysfs;
        /* the dirty list is only used by non-reference counted roots */
        struct list_head dirty_list;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6462c29d2d37..f1cbd028f7b3 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
                item->data_len = data_len;
                item->ins_or_del = 0;
                item->bytes_reserved = 0;
-                item->block_rsv = NULL;
                item->delayed_node = NULL;
                atomic_set(&item->refs, 1);
        }
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-        if (!ret) {
+        if (!ret)
                item->bytes_reserved = num_bytes;
-                item->block_rsv = dst_rsv;
-        }
        return ret;
 }
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
                                                struct btrfs_delayed_item *item)
 {
+        struct btrfs_block_rsv *rsv;
        if (!item->bytes_reserved)
                return;
-        btrfs_block_rsv_release(root, item->block_rsv,
+        rsv = &root->fs_info->global_block_rsv;
+        btrfs_block_rsv_release(root, rsv,
                                item->bytes_reserved);
 }
@@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_root *delayed_root;
        struct btrfs_delayed_node *curr_node, *prev_node;
        struct btrfs_path *path;
+        struct btrfs_block_rsv *block_rsv;
        int ret = 0;
        path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->leave_spinning = 1;
+        block_rsv = trans->block_rsv;
+        trans->block_rsv = &root->fs_info->global_block_rsv;
        delayed_root = btrfs_get_delayed_root(root);
        curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
        }
        btrfs_free_path(path);
+        trans->block_rsv = block_rsv;
        return ret;
 }
@@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
                                              struct btrfs_delayed_node *node)
 {
        struct btrfs_path *path;
+        struct btrfs_block_rsv *block_rsv;
        int ret;
        path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->leave_spinning = 1;
+        block_rsv = trans->block_rsv;
+        trans->block_rsv = &node->root->fs_info->global_block_rsv;
        ret = btrfs_insert_delayed_items(trans, path, node->root, node);
        if (!ret)
                ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
                ret = btrfs_update_delayed_inode(trans, node->root, path, node);
        btrfs_free_path(path);
+        trans->block_rsv = block_rsv;
        return ret;
 }
@@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
        struct btrfs_path *path;
        struct btrfs_delayed_node *delayed_node = NULL;
        struct btrfs_root *root;
+        struct btrfs_block_rsv *block_rsv;
        unsigned long nr = 0;
        int need_requeue = 0;
        int ret;
@@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
        if (IS_ERR(trans))
                goto free_path;
+        block_rsv = trans->block_rsv;
+        trans->block_rsv = &root->fs_info->global_block_rsv;
        ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
        if (!ret)
                ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
        nr = trans->blocks_used;
+        trans->block_rsv = block_rsv;
        btrfs_end_transaction_dmeta(trans, root);
        __btrfs_btree_balance_dirty(root, nr);
 free_path:
@@ -1222,6 +1237,13 @@ again:
        return 0;
 }
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+        struct btrfs_delayed_root *delayed_root;
+        delayed_root = btrfs_get_delayed_root(root);
+        WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
        struct btrfs_delayed_root *delayed_root;
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index eb7d240aa648..d1a6a2915c66 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
        struct list_head tree_list;     /* used for batch insert/delete items */
        struct list_head readdir_list;  /* used for readdir items */
        u64 bytes_reserved;
-        struct btrfs_block_rsv *block_rsv;
        struct btrfs_delayed_node *delayed_node;
        atomic_t refs;
        int ins_or_del;
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
 /* for init */
 int __init btrfs_delayed_inode_init(void);
 void btrfs_delayed_inode_exit(void);
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
 #endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9f68c6898653..1ac8db5dc0a3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->last_trans = 0;
        root->highest_objectid = 0;
        root->name = NULL;
-        root->in_sysfs = 0;
        root->inode_tree = RB_ROOT;
        INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
        root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
                return root;
        root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
-        if (!root->free_ino_ctl)
-                goto fail;
        root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
                                        GFP_NOFS);
-        if (!root->free_ino_pinned)
+        if (!root->free_ino_pinned || !root->free_ino_ctl) {
+                ret = -ENOMEM;
                goto fail;
+        }
        btrfs_init_free_ino_ctl(root);
        mutex_init(&root->fs_commit_mutex);
        spin_lock_init(&root->cache_lock);
        init_waitqueue_head(&root->cache_wait);
-        set_anon_super(&root->anon_super, NULL);
+        ret = set_anon_super(&root->anon_super, NULL);
+        if (ret)
+                goto fail;
        if (btrfs_root_refs(&root->root_item) == 0) {
                ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->fs_roots_radix_lock);
        spin_lock_init(&fs_info->delayed_iput_lock);
        spin_lock_init(&fs_info->defrag_inodes_lock);
+        mutex_init(&fs_info->reloc_mutex);
        init_completion(&fs_info->kobj_unregister);
        fs_info->tree_root = tree_root;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b42efc2ded51..1f61bf5b4960 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        if (reserved == 0)
                return 0;
-        /* nothing to shrink - nothing to reclaim */
-        if (root->fs_info->delalloc_bytes == 0)
-                return 0;
        max_reclaim = min(reserved, to_reclaim);
        while (loops < 1024) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 751ddf8fc58a..0a9b10c5b0a7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        ret = btrfs_update_inode(trans, root, dir);
        BUG_ON(ret);
+        btrfs_free_path(path);
        return 0;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b793d112d1f6..a3c4751e07db 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
        ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
        BUG_ON(ret);
+        spin_lock(&root->fs_info->trans_lock);
        list_add(&pending_snapshot->list,
                 &trans->transaction->pending_snapshots);
+        spin_unlock(&root->fs_info->trans_lock);
        if (async_transid) {
                *async_transid = trans->transid;
                ret = btrfs_commit_transaction_async(trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b1ef27cc673b..5e0a3dc79a45 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
        int ret;
        if (!root->reloc_root)
-                return 0;
+                goto out;
        reloc_root = root->reloc_root;
        root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
        ret = btrfs_update_root(trans, root->fs_info->tree_root,
                                &reloc_root->root_key, root_item);
        BUG_ON(ret);
+out:
        return 0;
 }
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
        u64 num_bytes = 0;
        int ret;
-        spin_lock(&root->fs_info->trans_lock);
+        mutex_lock(&root->fs_info->reloc_mutex);
        rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
        rc->merging_rsv_size += rc->nodes_relocated * 2;
-        spin_unlock(&root->fs_info->trans_lock);
+        mutex_unlock(&root->fs_info->reloc_mutex);
 again:
        if (!err) {
                num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
        int ret;
 again:
        root = rc->extent_root;
-        spin_lock(&root->fs_info->trans_lock);
+        /*
+         * this serializes us with btrfs_record_root_in_transaction,
+         * we have to make sure nobody is in the middle of
+         * adding their roots to the list while we are
+         * doing this splice
+         */
+        mutex_lock(&root->fs_info->reloc_mutex);
        list_splice_init(&rc->reloc_roots, &reloc_roots);
-        spin_unlock(&root->fs_info->trans_lock);
+        mutex_unlock(&root->fs_info->reloc_mutex);
        while (!list_empty(&reloc_roots)) {
                found = 1;
@@ -3590,17 +3600,19 @@ next:
 static void set_reloc_control(struct reloc_control *rc)
 {
        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-        spin_lock(&fs_info->trans_lock);
+        mutex_lock(&fs_info->reloc_mutex);
        fs_info->reloc_ctl = rc;
-        spin_unlock(&fs_info->trans_lock);
+        mutex_unlock(&fs_info->reloc_mutex);
 }
 static void unset_reloc_control(struct reloc_control *rc)
 {
        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-        spin_lock(&fs_info->trans_lock);
+        mutex_lock(&fs_info->reloc_mutex);
        fs_info->reloc_ctl = NULL;
-        spin_unlock(&fs_info->trans_lock);
+        mutex_unlock(&fs_info->reloc_mutex);
 }
 static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c3c223ae6691..daac9ae6d731 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -28,152 +28,6 @@
 #include "disk-io.h"
 #include "transaction.h"
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%llu\n",
-                (unsigned long long)btrfs_root_used(&root->root_item));
-}
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%llu\n",
-                (unsigned long long)btrfs_root_limit(&root->root_item));
-}
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%llu\n",
-                (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%llu\n",
-                (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
-        return snprintf(buf, PAGE_SIZE, "%llu\n",
-                (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
-        struct attribute attr;
-        ssize_t (*show)(struct btrfs_root *, char *);
-        ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
-                                                              show, store)
-ROOT_ATTR(blocks_used,  0444,   root_blocks_used_show,  NULL);
-ROOT_ATTR(block_limit,  0644,   root_block_limit_show,  NULL);
-static struct attribute *btrfs_root_attrs[] = {
-        &btrfs_root_attr_blocks_used.attr,
-        &btrfs_root_attr_block_limit.attr,
-        NULL,
-};
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
-        struct attribute attr;
-        ssize_t (*show)(struct btrfs_fs_info *, char *);
-        ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
-                                                                show, store)
-SUPER_ATTR(blocks_used,         0444,   super_blocks_used_show,         NULL);
-SUPER_ATTR(total_blocks,        0444,   super_total_blocks_show,        NULL);
-SUPER_ATTR(blocksize,           0444,   super_blocksize_show,           NULL);
-static struct attribute *btrfs_super_attrs[] = {
-        &btrfs_super_attr_blocks_used.attr,
-        &btrfs_super_attr_total_blocks.attr,
-        &btrfs_super_attr_blocksize.attr,
-        NULL,
-};
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
-                                    struct attribute *attr, char *buf)
-{
-        struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-                                                super_kobj);
-        struct btrfs_super_attr *a = container_of(attr,
-                                                  struct btrfs_super_attr,
-                                                  attr);
-        return a->show ? a->show(fs, buf) : 0;
-}
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
-                                     struct attribute *attr,
-                                     const char *buf, size_t len)
-{
-        struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-                                                super_kobj);
-        struct btrfs_super_attr *a = container_of(attr,
-                                                  struct btrfs_super_attr,
-                                                  attr);
-        return a->store ? a->store(fs, buf, len) : 0;
-}
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
-                                    struct attribute *attr, char *buf)
-{
-        struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-                                                root_kobj);
-        struct btrfs_root_attr *a = container_of(attr,
-                                                 struct btrfs_root_attr,
-                                                 attr);
-        return a->show ? a->show(root, buf) : 0;
-}
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
-                                     struct attribute *attr,
-                                     const char *buf, size_t len)
-{
-        struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-                                                root_kobj);
-        struct btrfs_root_attr *a = container_of(attr,
-                                                 struct btrfs_root_attr,
-                                                 attr);
-        return a->store ? a->store(root, buf, len) : 0;
-}
-static void btrfs_super_release(struct kobject *kobj)
-{
-        struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-                                                super_kobj);
-        complete(&fs->kobj_unregister);
-}
-static void btrfs_root_release(struct kobject *kobj)
-{
-        struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-                                                root_kobj);
-        complete(&root->kobj_unregister);
-}
-static const struct sysfs_ops btrfs_super_attr_ops = {
-        .show   = btrfs_super_attr_show,
-        .store  = btrfs_super_attr_store,
-};
-static const struct sysfs_ops btrfs_root_attr_ops = {
-        .show   = btrfs_root_attr_show,
-        .store  = btrfs_root_attr_store,
-};
 /* /sys/fs/btrfs/ entry */
 static struct kset *btrfs_kset;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2b3590b9fe98..51dcec86757f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
 * to make sure the old root from before we joined the transaction is deleted
 * when the transaction commits
 */
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root)
 {
        if (root->ref_cows && root->last_trans < trans->transid) {
                WARN_ON(root == root->fs_info->extent_root);
                WARN_ON(root->commit_root != root->node);
+                /*
+                 * see below for in_trans_setup usage rules
+                 * we have the reloc mutex held now, so there
+                 * is only one writer in this function
+                 */
+                root->in_trans_setup = 1;
+                /* make sure readers find in_trans_setup before
+                 * they find our root->last_trans update
+                 */
+                smp_wmb();
                spin_lock(&root->fs_info->fs_roots_radix_lock);
                if (root->last_trans == trans->transid) {
                        spin_unlock(&root->fs_info->fs_roots_radix_lock);
                        return 0;
                }
-                root->last_trans = trans->transid;
                radix_tree_tag_set(&root->fs_info->fs_roots_radix,
                           (unsigned long)root->root_key.objectid,
                           BTRFS_ROOT_TRANS_TAG);
                spin_unlock(&root->fs_info->fs_roots_radix_lock);
+                root->last_trans = trans->transid;
+                /* this is pretty tricky.  We don't want to
+                 * take the relocation lock in btrfs_record_root_in_trans
+                 * unless we're really doing the first setup for this root in
+                 * this transaction.
+                 *
+                 * Normally we'd use root->last_trans as a flag to decide
+                 * if we want to take the expensive mutex.
+                 *
+                 * But, we have to set root->last_trans before we
+                 * init the relocation root, otherwise, we trip over warnings
+                 * in ctree.c.  The solution used here is to flag ourselves
+                 * with root->in_trans_setup.  When this is 1, we're still
+                 * fixing up the reloc trees and everyone must wait.
+                 *
+                 * When this is zero, they can trust root->last_trans and fly
+                 * through btrfs_record_root_in_trans without having to take the
+                 * lock.  smp_wmb() makes sure that all the writes above are
+                 * done before we pop in the zero below
+                 */
                btrfs_init_reloc_root(trans, root);
+                smp_wmb();
+                root->in_trans_setup = 0;
        }
        return 0;
 }
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root)
+{
+        if (!root->ref_cows)
+                return 0;
+        /*
+         * see record_root_in_trans for comments about in_trans_setup usage
+         * and barriers
+         */
+        smp_rmb();
+        if (root->last_trans == trans->transid &&
+            !root->in_trans_setup)
+                return 0;
+        mutex_lock(&root->fs_info->reloc_mutex);
+        record_root_in_trans(trans, root);
+        mutex_unlock(&root->fs_info->reloc_mutex);
+        return 0;
+}
 /* wait for commit against the current transaction to become unblocked
 * when this is done, it is safe to start a new transaction, but the current
 * transaction might not be fully on disk.
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        parent = dget_parent(dentry);
        parent_inode = parent->d_inode;
        parent_root = BTRFS_I(parent_inode)->root;
-        btrfs_record_root_in_trans(trans, parent_root);
+        record_root_in_trans(trans, parent_root);
        /*
         * insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        BUG_ON(ret);
-        btrfs_record_root_in_trans(trans, root);
+        /*
+         * pull in the delayed directory update
+         * and the delayed inode item
+         * otherwise we corrupt the FS during
+         * snapshot
+         */
+        ret = btrfs_run_delayed_items(trans, root);
+        BUG_ON(ret);
+        record_root_in_trans(trans, root);
        btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
        memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
        btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
        int ret;
        list_for_each_entry(pending, head, list) {
-                /*
-                 * We must deal with the delayed items before creating
-                 * snapshots, or we will create a snapthot with inconsistent
-                 * information.
-                */
-                ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
-                BUG_ON(ret);
                ret = create_pending_snapshot(trans, fs_info, pending);
                BUG_ON(ret);
        }
@@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        schedule_timeout(1);
                finish_wait(&cur_trans->writer_wait, &wait);
-                spin_lock(&root->fs_info->trans_lock);
-                root->fs_info->trans_no_join = 1;
-                spin_unlock(&root->fs_info->trans_lock);
        } while (atomic_read(&cur_trans->num_writers) > 1 ||
                 (should_grow && cur_trans->num_joined != joined));
-        ret = create_pending_snapshots(trans, root->fs_info);
+        /*
-        BUG_ON(ret);
+         * Ok now we need to make sure to block out any other joins while we
+         * commit the transaction.  We could have started a join before setting
+         * no_join so make sure to wait for num_writers to == 1 again.
+         */
+        spin_lock(&root->fs_info->trans_lock);
+        root->fs_info->trans_no_join = 1;
+        spin_unlock(&root->fs_info->trans_lock);
+        wait_event(cur_trans->writer_wait,
+                   atomic_read(&cur_trans->num_writers) == 1);
+        /*
+         * the reloc mutex makes sure that we stop
+         * the balancing code from coming in and moving
+         * extents around in the middle of the commit
+         */
+        mutex_lock(&root->fs_info->reloc_mutex);
        ret = btrfs_run_delayed_items(trans, root);
        BUG_ON(ret);
+        ret = create_pending_snapshots(trans, root->fs_info);
+        BUG_ON(ret);
        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
        BUG_ON(ret);
+        /*
+         * make sure none of the code above managed to slip in a
+         * delayed item
+         */
+        btrfs_assert_delayed_root_empty(root);
        WARN_ON(cur_trans != trans->transaction);
        btrfs_scrub_pause(root);
@@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        root->fs_info->running_transaction = NULL;
        root->fs_info->trans_no_join = 0;
        spin_unlock(&root->fs_info->trans_lock);
+        mutex_unlock(&root->fs_info->reloc_mutex);
        wake_up(&root->fs_info->transaction_wait);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 592396c6dc47..4ce8a9f41d1e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3177,7 +3177,7 @@ again:
                tmp_key.offset = (u64)-1;
                wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
-                BUG_ON(!wc.replay_dest);
+                BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
                wc.replay_dest->log_root = log;
                btrfs_record_root_in_trans(trans, wc.replay_dest);
diff --git a/fs/buffer.c b/fs/buffer.c
index 49c9aada0374..1a80b048ade8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1902,10 +1902,8 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
                if (!buffer_uptodate(*wait_bh))
                        err = -EIO;
        }
-        if (unlikely(err)) {
+        if (unlikely(err))
                page_zero_new_buffers(page, from, to);
-                ClearPageUptodate(page);
-        }
        return err;
 }
 EXPORT_SYMBOL(__block_write_begin);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e9def996e383..2f0c58646c10 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -257,9 +257,6 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
 {
        struct cifs_sb_info *cifs_sb;
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
        cifs_sb = CIFS_SB(inode->i_sb);
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 6cbb3afb36dc..cb140ef293e4 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -43,8 +43,6 @@ const struct file_operations coda_ioctl_operations = {
 /* the coda pioctl inode ops */
 static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
 {
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
        return (mask & MAY_EXEC) ? -EACCES : 0;
 }
diff --git a/fs/exec.c b/fs/exec.c
index 97e0d52d72fd..6075a1e727ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file)
 * is a special value that we use to trap recursive
 * core dumps
 */
-static int umh_pipe_setup(struct subprocess_info *info)
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
        struct file *rp, *wp;
        struct fdtable *fdt;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 3db5ba4568fc..b3cc8586984e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -974,7 +974,7 @@ out_no_inode:
 out_no_read:
        printk(KERN_WARNING "%s: bread failed, dev=%s, iso_blknum=%d, block=%d\n",
                __func__, s->s_id, iso_blknum, block);
-        goto out_freesbi;
+        goto out_freebh;
 out_bad_zone_size:
        printk(KERN_WARNING "ISOFS: Bad logical zone size %ld\n",
                sbi->s_log_zone_size);
@@ -989,6 +989,7 @@ out_unknown_format:
 out_freebh:
        brelse(bh);
+        brelse(pri_bh);
 out_freesbi:
        kfree(opt.iocharset);
        kfree(sbi);
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 9ed89d1663f8..1afae26cf236 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,13 +555,6 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
        return __logfs_create(dir, dentry, inode, target, destlen);
 }
-static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
-{
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
-        return generic_permission(inode, mask, flags, NULL);
-}
 static int logfs_link(struct dentry *old_dentry, struct inode *dir,
                struct dentry *dentry)
 {
@@ -820,7 +813,6 @@ const struct inode_operations logfs_dir_iops = {
        .mknod          = logfs_mknod,
        .rename         = logfs_rename,
        .rmdir          = logfs_rmdir,
-        .permission     = logfs_permission,
        .symlink        = logfs_symlink,
        .unlink         = logfs_unlink,
 };
diff --git a/fs/namei.c b/fs/namei.c
index 9802345df5e7..0223c41fb114 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -238,7 +238,8 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
        /*
         * Read/write DACs are always overridable.
-         * Executable DACs are overridable if at least one exec bit is set.
+         * Executable DACs are overridable for all directories and
+         * for non-directories that have least one exec bit set.
         */
        if (!(mask & MAY_EXEC) || execute_ok(inode))
                if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
@@ -812,6 +813,11 @@ static int follow_automount(struct path *path, unsigned flags,
        if (!mnt) /* mount collision */
                return 0;
+        if (!*need_mntput) {
+                /* lock_mount() may release path->mnt on error */
+                mntget(path->mnt);
+                *need_mntput = true;
+        }
        err = finish_automount(mnt, path);
        switch (err) {
@@ -819,12 +825,9 @@ static int follow_automount(struct path *path, unsigned flags,
                /* Someone else made a mount here whilst we were busy */
                return 0;
        case 0:
-                dput(path->dentry);
+                path_put(path);
-                if (*need_mntput)
-                        mntput(path->mnt);
                path->mnt = mnt;
                path->dentry = dget(mnt->mnt_root);
-                *need_mntput = true;
                return 0;
        default:
                return err;
@@ -844,9 +847,10 @@ static int follow_automount(struct path *path, unsigned flags,
 */
 static int follow_managed(struct path *path, unsigned flags)
 {
+        struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
        unsigned managed;
        bool need_mntput = false;
-        int ret;
+        int ret = 0;
        /* Given that we're not holding a lock here, we retain the value in a
         * local variable for each dentry as we look at it so that we don't see
@@ -861,7 +865,7 @@ static int follow_managed(struct path *path, unsigned flags)
                        BUG_ON(!path->dentry->d_op->d_manage);
                        ret = path->dentry->d_op->d_manage(path->dentry, false);
                        if (ret < 0)
-                                return ret == -EISDIR ? 0 : ret;
+                                break;
                }
                /* Transit to a mounted filesystem. */
@@ -887,14 +891,19 @@ static int follow_managed(struct path *path, unsigned flags)
                if (managed & DCACHE_NEED_AUTOMOUNT) {
                        ret = follow_automount(path, flags, &need_mntput);
                        if (ret < 0)
-                                return ret == -EISDIR ? 0 : ret;
+                                break;
                        continue;
                }
                /* We didn't change the current path point */
                break;
        }
-        return 0;
+        if (need_mntput && path->mnt == mnt)
+                mntput(path->mnt);
+        if (ret == -EISDIR)
+                ret = 0;
+        return ret;
 }
 int follow_down_one(struct path *path)
@@ -1003,9 +1012,6 @@ failed:
 * Follow down to the covering mount currently visible to userspace.  At each
 * point, the filesystem owning that dentry may be queried as to whether the
 * caller is permitted to proceed or not.
- *
- * Care must be taken as namespace_sem may be held (indicated by mounting_here
- * being true).
 */
 int follow_down(struct path *path)
 {
@@ -2713,8 +2719,10 @@ static long do_unlinkat(int dfd, const char __user *pathname)
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
                /* Why not before? Because we want correct error value */
+                if (nd.last.name[nd.last.len])
+                        goto slashes;
                inode = dentry->d_inode;
-                if (nd.last.name[nd.last.len] || !inode)
+                if (!inode)
                        goto slashes;
                ihold(inode);
                error = mnt_want_write(nd.path.mnt);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 18b3e8975fe0..fbb2a5ef5817 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,7 @@ config NFSD_V4
        select NFSD_V3
        select FS_POSIX_ACL
        select SUNRPC_GSS
+        select CRYPTO
        help
          This option enables support in your system's NFS server for
          version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1f5eae40f34e..2b1449dd2f49 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -13,6 +13,7 @@
 #include <linux/lockd/lockd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/gss_krb5_enctypes.h>
 #include "idmap.h"
 #include "nfsd.h"
@@ -189,18 +190,10 @@ static struct file_operations export_features_operations = {
        .release        = single_release,
 };
-#ifdef CONFIG_SUNRPC_GSS
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
 static int supported_enctypes_show(struct seq_file *m, void *v)
 {
-        struct gss_api_mech *k5mech;
+        seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
-        k5mech = gss_mech_get_by_name("krb5");
-        if (k5mech == NULL)
-                goto out;
-        if (k5mech->gm_upcall_enctypes != NULL)
-                seq_printf(m, k5mech->gm_upcall_enctypes);
-        gss_mech_put(k5mech);
-out:
        return 0;
 }
@@ -215,7 +208,7 @@ static struct file_operations supported_enctypes_ops = {
        .llseek         = seq_lseek,
        .release        = single_release,
 };
-#endif /* CONFIG_SUNRPC_GSS */
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
@@ -1427,9 +1420,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
                [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
                [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
-#ifdef CONFIG_SUNRPC_GSS
+#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
                [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
-#endif /* CONFIG_SUNRPC_GSS */
+#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
 #ifdef CONFIG_NFSD_V4
                [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d5718273bb32..fd0acca5370a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -696,7 +696,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
 }
 #endif /* CONFIG_NFSD_V3 */
+static int nfsd_open_break_lease(struct inode *inode, int access)
+{
+        unsigned int mode;
+        if (access & NFSD_MAY_NOT_BREAK_LEASE)
+                return 0;
+        mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
+        return break_lease(inode, mode | O_NONBLOCK);
+}
 /*
 * Open an existing file or directory.
@@ -744,12 +752,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!inode->i_fop)
                goto out;
-        /*
+        host_err = nfsd_open_break_lease(inode, access);
-         * Check to see if there are any leases on this file.
-         * This may block while leases are broken.
-         */
-        if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
-                host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
        if (host_err) /* NOMEM or WOULDBLOCK */
                goto out_nfserr;
@@ -1660,8 +1663,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        if (!dold->d_inode)
                goto out_drop_write;
        host_err = nfsd_break_lease(dold->d_inode);
-        if (host_err)
+        if (host_err) {
+                err = nfserrno(host_err);
                goto out_drop_write;
+        }
        host_err = vfs_link(dold, dirp, dnew);
        if (!host_err) {
                err = nfserrno(commit_metadata(ffhp));
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b954878ad6ce..b9b45fc2903e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -801,12 +801,7 @@ out_err:
 int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
-        struct nilfs_root *root;
+        struct nilfs_root *root = NILFS_I(inode)->i_root;
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
-        root = NILFS_I(inode)->i_root;
        if ((mask & MAY_WRITE) && root &&
            root->cno != NILFS_CPTREE_CURRENT_CNO)
                return -EROFS; /* snapshot is not writable */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 14def991d9dd..8a84210ca080 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2169,11 +2169,7 @@ static const struct file_operations proc_fd_operations = {
 */
 static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
 {
-        int rv;
+        int rv = generic_permission(inode, mask, flags, NULL);
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
-        rv = generic_permission(inode, mask, flags, NULL);
        if (rv == 0)
                return 0;
        if (task_pid(current) == proc_pid(inode))
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 781dec5bd682..be177f702acb 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -38,18 +38,21 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
        struct inode *inode;
        struct proc_inode *ei;
        struct dentry *error = ERR_PTR(-ENOENT);
+        void *ns;
        inode = proc_pid_make_inode(dir->i_sb, task);
        if (!inode)
                goto out;
+        ns = ns_ops->get(task);
+        if (!ns)
+                goto out_iput;
        ei = PROC_I(inode);
        inode->i_mode = S_IFREG|S_IRUSR;
        inode->i_fop  = &ns_file_operations;
        ei->ns_ops    = ns_ops;
-        ei->ns        = ns_ops->get(task);
+        ei->ns        = ns;
-        if (!ei->ns)
-                goto out_iput;
        dentry->d_op = &pid_dentry_operations;
        d_add(dentry, inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f50133c11c24..d167de365a8d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -304,9 +304,6 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
        struct ctl_table *table;
        int error;
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
        /* Executable files are not allowed under /proc/sys/ */
        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
                return -EACCES;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a9000e9cfee5..d6c3b416529b 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -28,11 +28,12 @@ static int proc_test_super(struct super_block *sb, void *data)
 static int proc_set_super(struct super_block *sb, void *data)
 {
-        struct pid_namespace *ns;
+        int err = set_anon_super(sb, NULL);
+        if (!err) {
-        ns = (struct pid_namespace *)data;
+                struct pid_namespace *ns = (struct pid_namespace *)data;
-        sb->s_fs_info = get_pid_ns(ns);
+                sb->s_fs_info = get_pid_ns(ns);
-        return set_anon_super(sb, NULL);
+        }
+        return err;
 }
 static struct dentry *proc_mount(struct file_system_type *fs_type,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e8a62f41b458..d78089690965 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -954,8 +954,6 @@ static int xattr_mount_check(struct super_block *s)
 int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
 {
-        if (flags & IPERM_FLAG_RCU)
-                return -ECHILD;
        /*
         * We don't do permission checks on the internal objects.
         * Permissions are determined by the "owning" object.
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 266895783b47..e34f0d99ea4e 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -95,6 +95,14 @@ static int sysfs_set_super(struct super_block *sb, void *data)
        return error;
 }
+static void free_sysfs_super_info(struct sysfs_super_info *info)
+{
+        int type;
+        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
+                kobj_ns_drop(type, info->ns[type]);
+        kfree(info);
+}
 static struct dentry *sysfs_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
@@ -108,11 +116,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
                return ERR_PTR(-ENOMEM);
        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
-                info->ns[type] = kobj_ns_current(type);
+                info->ns[type] = kobj_ns_grab_current(type);
        sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
        if (IS_ERR(sb) || sb->s_fs_info != info)
-                kfree(info);
+                free_sysfs_super_info(info);
        if (IS_ERR(sb))
                return ERR_CAST(sb);
        if (!sb->s_root) {
@@ -131,12 +139,11 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 static void sysfs_kill_sb(struct super_block *sb)
 {
        struct sysfs_super_info *info = sysfs_info(sb);
        /* Remove the superblock from fs_supers/s_instances
         * so we can't find it, before freeing sysfs_super_info.
         */
        kill_anon_super(sb);
-        kfree(info);
+        free_sysfs_super_info(info);
 }
 static struct file_system_type sysfs_fs_type = {
@@ -145,28 +152,6 @@ static struct file_system_type sysfs_fs_type = {
        .kill_sb        = sysfs_kill_sb,
 };
-void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
-{
-        struct super_block *sb;
-        mutex_lock(&sysfs_mutex);
-        spin_lock(&sb_lock);
-        list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
-                struct sysfs_super_info *info = sysfs_info(sb);
-                /*
-                 * If we see a superblock on the fs_supers/s_instances
-                 * list the unmount has not completed and sb->s_fs_info
-                 * points to a valid struct sysfs_super_info.
-                 */
-                /* Ignore superblocks with the wrong ns */
-                if (info->ns[type] != ns)
-                        continue;
-                info->ns[type] = NULL;
-        }
-        spin_unlock(&sb_lock);
-        mutex_unlock(&sysfs_mutex);
-}
 int __init sysfs_init(void)
 {
        int err = -ENOMEM;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3d28af31d863..2ed2404f3113 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -136,7 +136,7 @@ struct sysfs_addrm_cxt {
 * instance).
 */
 struct sysfs_super_info {
-        const void *ns[KOBJ_NS_TYPES];
+        void *ns[KOBJ_NS_TYPES];
 };
 #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
 extern struct sysfs_dirent sysfs_root;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index f67acbdda5e8..dffeb3795af1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
 /*
 * Called when the clock was set to cancel the timers in the cancel
- * list.
+ * list. This will wake up processes waiting on these timers. The
+ * wake-up requires ctx->ticks to be non zero, therefore we increment
+ * it before calling wake_up_locked().
 */
 void timerfd_clock_was_set(void)
 {
@@ -76,6 +78,7 @@ void timerfd_clock_was_set(void)
                spin_lock_irqsave(&ctx->wqh.lock, flags);
                if (ctx->moffs.tv64 != moffs.tv64) {
                        ctx->moffs.tv64 = KTIME_MAX;
+                        ctx->ticks++;
                        wake_up_locked(&ctx->wqh);
                }
                spin_unlock_irqrestore(&ctx->wqh.lock, flags);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b5aeb5a8ebed..529be0582029 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1848,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb)
        bdi_destroy(&c->bdi);
        ubi_close_volume(c->ubi);
        mutex_unlock(&c->umount_mutex);
-        kfree(c);
 }
 static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1971,61 +1970,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
        return ERR_PTR(-EINVAL);
 }
-static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
 {
-        struct ubi_volume_desc *ubi = sb->s_fs_info;
        struct ubifs_info *c;
-        struct inode *root;
-        int err;
        c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
-        if (!c)
+        if (c) {
-                return -ENOMEM;
+                spin_lock_init(&c->cnt_lock);
+                spin_lock_init(&c->cs_lock);
+                spin_lock_init(&c->buds_lock);
+                spin_lock_init(&c->space_lock);
+                spin_lock_init(&c->orphan_lock);
+                init_rwsem(&c->commit_sem);
+                mutex_init(&c->lp_mutex);
+                mutex_init(&c->tnc_mutex);
+                mutex_init(&c->log_mutex);
+                mutex_init(&c->mst_mutex);
+                mutex_init(&c->umount_mutex);
+                mutex_init(&c->bu_mutex);
+                mutex_init(&c->write_reserve_mutex);
+                init_waitqueue_head(&c->cmt_wq);
+                c->buds = RB_ROOT;
+                c->old_idx = RB_ROOT;
+                c->size_tree = RB_ROOT;
+                c->orph_tree = RB_ROOT;
+                INIT_LIST_HEAD(&c->infos_list);
+                INIT_LIST_HEAD(&c->idx_gc);
+                INIT_LIST_HEAD(&c->replay_list);
+                INIT_LIST_HEAD(&c->replay_buds);
+                INIT_LIST_HEAD(&c->uncat_list);
+                INIT_LIST_HEAD(&c->empty_list);
+                INIT_LIST_HEAD(&c->freeable_list);
+                INIT_LIST_HEAD(&c->frdi_idx_list);
+                INIT_LIST_HEAD(&c->unclean_leb_list);
+                INIT_LIST_HEAD(&c->old_buds);
+                INIT_LIST_HEAD(&c->orph_list);
+                INIT_LIST_HEAD(&c->orph_new);
+                c->no_chk_data_crc = 1;
+                c->highest_inum = UBIFS_FIRST_INO;
+                c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
+                ubi_get_volume_info(ubi, &c->vi);
+                ubi_get_device_info(c->vi.ubi_num, &c->di);
+        }
+        return c;
+}
-        spin_lock_init(&c->cnt_lock);
+static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
-        spin_lock_init(&c->cs_lock);
+{
-        spin_lock_init(&c->buds_lock);
+        struct ubifs_info *c = sb->s_fs_info;
-        spin_lock_init(&c->space_lock);
+        struct inode *root;
-        spin_lock_init(&c->orphan_lock);
+        int err;
-        init_rwsem(&c->commit_sem);
-        mutex_init(&c->lp_mutex);
-        mutex_init(&c->tnc_mutex);
-        mutex_init(&c->log_mutex);
-        mutex_init(&c->mst_mutex);
-        mutex_init(&c->umount_mutex);
-        mutex_init(&c->bu_mutex);
-        mutex_init(&c->write_reserve_mutex);
-        init_waitqueue_head(&c->cmt_wq);
-        c->buds = RB_ROOT;
-        c->old_idx = RB_ROOT;
-        c->size_tree = RB_ROOT;
-        c->orph_tree = RB_ROOT;
-        INIT_LIST_HEAD(&c->infos_list);
-        INIT_LIST_HEAD(&c->idx_gc);
-        INIT_LIST_HEAD(&c->replay_list);
-        INIT_LIST_HEAD(&c->replay_buds);
-        INIT_LIST_HEAD(&c->uncat_list);
-        INIT_LIST_HEAD(&c->empty_list);
-        INIT_LIST_HEAD(&c->freeable_list);
-        INIT_LIST_HEAD(&c->frdi_idx_list);
-        INIT_LIST_HEAD(&c->unclean_leb_list);
-        INIT_LIST_HEAD(&c->old_buds);
-        INIT_LIST_HEAD(&c->orph_list);
-        INIT_LIST_HEAD(&c->orph_new);
-        c->no_chk_data_crc = 1;
        c->vfs_sb = sb;
-        c->highest_inum = UBIFS_FIRST_INO;
-        c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
-        ubi_get_volume_info(ubi, &c->vi);
-        ubi_get_device_info(c->vi.ubi_num, &c->di);
        /* Re-open the UBI device in read-write mode */
        c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
        if (IS_ERR(c->ubi)) {
                err = PTR_ERR(c->ubi);
-                goto out_free;
+                goto out;
        }
        /*
@@ -2091,24 +2094,29 @@ out_bdi:
        bdi_destroy(&c->bdi);
 out_close:
        ubi_close_volume(c->ubi);
-out_free:
+out:
-        kfree(c);
        return err;
 }
 static int sb_test(struct super_block *sb, void *data)
 {
-        dev_t *dev = data;
+        struct ubifs_info *c1 = data;
        struct ubifs_info *c = sb->s_fs_info;
-        return c->vi.cdev == *dev;
+        return c->vi.cdev == c1->vi.cdev;
+}
+static int sb_set(struct super_block *sb, void *data)
+{
+        sb->s_fs_info = data;
+        return set_anon_super(sb, NULL);
 }
 static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
                        const char *name, void *data)
 {
        struct ubi_volume_desc *ubi;
-        struct ubi_volume_info vi;
+        struct ubifs_info *c;
        struct super_block *sb;
        int err;
@@ -2125,19 +2133,25 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
                        name, (int)PTR_ERR(ubi));
                return ERR_CAST(ubi);
        }
-        ubi_get_volume_info(ubi, &vi);
-        dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
+        c = alloc_ubifs_info(ubi);
+        if (!c) {
+                err = -ENOMEM;
+                goto out_close;
+        }
+        dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
-        sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev);
+        sb = sget(fs_type, sb_test, sb_set, c);
        if (IS_ERR(sb)) {
                err = PTR_ERR(sb);
+                kfree(c);
                goto out_close;
        }
        if (sb->s_root) {
                struct ubifs_info *c1 = sb->s_fs_info;
+                kfree(c);
                /* A new mount point for already mounted UBIFS */
                dbg_gen("this ubi volume is already mounted");
                if (!!(flags & MS_RDONLY) != c1->ro_mount) {
@@ -2146,11 +2160,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
                }
        } else {
                sb->s_flags = flags;
-                /*
-                 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
-                 * replaced by 'c'.
-                 */
-                sb->s_fs_info = ubi;
                err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (err)
                        goto out_deact;
@@ -2170,11 +2179,18 @@ out_close:
        return ERR_PTR(err);
 }
+static void kill_ubifs_super(struct super_block *s)
+{
+        struct ubifs_info *c = s->s_fs_info;
+        kill_anon_super(s);
+        kfree(c);
+}
 static struct file_system_type ubifs_fs_type = {
        .name    = "ubifs",
        .owner   = THIS_MODULE,
        .mount   = ubifs_mount,
-        .kill_sb = kill_anon_super,
+        .kill_sb = kill_ubifs_super,
 };
 /*
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4213ba1ff85..7f782af286bf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -131,19 +131,34 @@ xfs_file_fsync(
 {
        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
+        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
        int                     error = 0;
        int                     log_flushed = 0;
        trace_xfs_file_fsync(ip);
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+        if (XFS_FORCED_SHUTDOWN(mp))
                return -XFS_ERROR(EIO);
        xfs_iflags_clear(ip, XFS_ITRUNCATED);
        xfs_ioend_wait(ip);
+        if (mp->m_flags & XFS_MOUNT_BARRIER) {
+                /*
+                 * If we have an RT and/or log subvolume we need to make sure
+                 * to flush the write cache the device used for file data
+                 * first.  This is to ensure newly written file data make
+                 * it to disk before logging the new inode size in case of
+                 * an extending write.
+                 */
+                if (XFS_IS_REALTIME_INODE(ip))
+                        xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+                else if (mp->m_logdev_targp != mp->m_ddev_targp)
+                        xfs_blkdev_issue_flush(mp->m_ddev_targp);
+        }
        /*
         * We always need to make sure that the required inode state is safe on
         * disk.  The inode might be clean but we still might need to force the
@@ -175,9 +190,9 @@ xfs_file_fsync(
                 * updates.  The sync transaction will also force the log.
                 */
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
+                tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
                error = xfs_trans_reserve(tp, 0,
-                                XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
+                                XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
                if (error) {
                        xfs_trans_cancel(tp, 0);
                        return -error;
@@ -209,28 +224,25 @@ xfs_file_fsync(
                 * force the log.
                 */
                if (xfs_ipincount(ip)) {
-                        error = _xfs_log_force_lsn(ip->i_mount,
+                        error = _xfs_log_force_lsn(mp,
                                        ip->i_itemp->ili_last_lsn,
                                        XFS_LOG_SYNC, &log_flushed);
                }
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
        }
-        if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
+        /*
-                /*
+         * If we only have a single device, and the log force about was
-                 * If the log write didn't issue an ordered tag we need
+         * a no-op we might have to flush the data device cache here.
-                 * to flush the disk cache for the data device now.
+         * This can only happen for fdatasync/O_DSYNC if we were overwriting
-                 */
+         * an already allocated file and thus do not have any metadata to
-                if (!log_flushed)
+         * commit.
-                        xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
+         */
+        if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-                /*
+            mp->m_logdev_targp == mp->m_ddev_targp &&
-                 * If this inode is on the RT dev we need to flush that
+            !XFS_IS_REALTIME_INODE(ip) &&
-                 * cache as well.
+            !log_flushed)
-                 */
+                xfs_blkdev_issue_flush(mp->m_ddev_targp);
-                if (XFS_IS_REALTIME_INODE(ip))
-                        xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
-        }
        return -error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index dd21784525a8..d44d92cd12b1 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -182,7 +182,7 @@ xfs_vn_mknod(
        if (IS_POSIXACL(dir)) {
                default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
                if (IS_ERR(default_acl))
-                        return -PTR_ERR(default_acl);
+                        return PTR_ERR(default_acl);
                if (!default_acl)
                        mode &= ~current_umask();
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1e3a7ce804dc..a1a881e68a9a 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -627,68 +627,6 @@ xfs_blkdev_put(
                blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 }
-/*
- * Try to write out the superblock using barriers.
- */
-STATIC int
-xfs_barrier_test(
-        xfs_mount_t     *mp)
-{
-        xfs_buf_t       *sbp = xfs_getsb(mp, 0);
-        int             error;
-        XFS_BUF_UNDONE(sbp);
-        XFS_BUF_UNREAD(sbp);
-        XFS_BUF_UNDELAYWRITE(sbp);
-        XFS_BUF_WRITE(sbp);
-        XFS_BUF_UNASYNC(sbp);
-        XFS_BUF_ORDERED(sbp);
-        xfsbdstrat(mp, sbp);
-        error = xfs_buf_iowait(sbp);
-        /*
-         * Clear all the flags we set and possible error state in the
-         * buffer.  We only did the write to try out whether barriers
-         * worked and shouldn't leave any traces in the superblock
-         * buffer.
-         */
-        XFS_BUF_DONE(sbp);
-        XFS_BUF_ERROR(sbp, 0);
-        XFS_BUF_UNORDERED(sbp);
-        xfs_buf_relse(sbp);
-        return error;
-}
-STATIC void
-xfs_mountfs_check_barriers(xfs_mount_t *mp)
-{
-        int error;
-        if (mp->m_logdev_targp != mp->m_ddev_targp) {
-                xfs_notice(mp,
-                  "Disabling barriers, not supported with external log device");
-                mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                return;
-        }
-        if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
-                xfs_notice(mp,
-                        "Disabling barriers, underlying device is readonly");
-                mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                return;
-        }
-        error = xfs_barrier_test(mp);
-        if (error) {
-                xfs_notice(mp,
-                        "Disabling barriers, trial barrier write failed");
-                mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                return;
-        }
-}
 void
 xfs_blkdev_issue_flush(
        xfs_buftarg_t           *buftarg)
@@ -1240,14 +1178,6 @@ xfs_fs_remount(
                switch (token) {
                case Opt_barrier:
                        mp->m_flags |= XFS_MOUNT_BARRIER;
-                        /*
-                         * Test if barriers are actually working if we can,
-                         * else delay this check until the filesystem is
-                         * marked writeable.
-                         */
-                        if (!(mp->m_flags & XFS_MOUNT_RDONLY))
-                                xfs_mountfs_check_barriers(mp);
                        break;
                case Opt_nobarrier:
                        mp->m_flags &= ~XFS_MOUNT_BARRIER;
@@ -1282,8 +1212,6 @@ xfs_fs_remount(
        /* ro -> rw */
        if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
                mp->m_flags &= ~XFS_MOUNT_RDONLY;
-                if (mp->m_flags & XFS_MOUNT_BARRIER)
-                        xfs_mountfs_check_barriers(mp);
                /*
                 * If this is the first remount to writeable state we
@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
        if (error)
                goto out_free_sb;
-        if (mp->m_flags & XFS_MOUNT_BARRIER)
-                xfs_mountfs_check_barriers(mp);
        error = xfs_filestream_mount(mp);
        if (error)
                goto out_free_sb;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 211930246f20..41d5b8f2bf92 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t		*log,
        XFS_BUF_ASYNC(bp);
        bp->b_flags |= XBF_LOG_BUFFER;
-        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+        if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+                /*
+                 * If we have an external log device, flush the data device
+                 * before flushing the log to make sure all meta data
+                 * written back from the AIL actually made it to disk
+                 * before writing out the new log tail LSN in the log buffer.
+                 */
+                if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+                        xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
                XFS_BUF_ORDERED(bp);
+        }
        ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
        ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
author	Paul Mundt <lethal@linux-sh.org>	2011-06-21 04:47:44 -0400
committer	Paul Mundt <lethal@linux-sh.org>	2011-06-21 04:47:44 -0400
commit	9dd056e9eba106ef622795b566f769a9ab0a49a8 (patch)
tree	390c07ea2c3b9631c2e8fe64c1a6feba0b503e8b /fs
parent	08ef2e427b59393d68a65b16e97e894b662a5573 (diff)
parent	f2b9726105824fdeea32a339e5072a358f89a25b (diff)