Merge commit 'c039c332f23e794deb6d6f37b9f07ff3b27fb2cf' into md

Pull in pre-requisites for adding raid10 support to dm-raid.
author: NeilBrown <neilb@suse.de> 2012-08-01 06:40:02 -0400
committer: NeilBrown <neilb@suse.de> 2012-08-01 06:40:02 -0400
commit: bb181e2e48f8c85db08c9cb015cbba9618dbf05c (patch)
tree: 191bc24dd97bcb174535cc217af082f16da3b43d /fs
parent: d57368afe63b3b7b45ce6c2b8c5276417935be2f (diff)
parent: c039c332f23e794deb6d6f37b9f07ff3b27fb2cf (diff)
216 files changed, 3715 insertions, 2621 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e78956cbd702..34c59f14a1c9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -144,7 +144,7 @@ extern void v9fs_session_close(struct v9fs_session_info *v9ses);
 extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
 extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
-                        struct nameidata *nameidata);
+                        unsigned int flags);
 extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d529437ff442..64600b5d0522 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -100,13 +100,13 @@ static void v9fs_dentry_release(struct dentry *dentry)
        }
 }
-static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct p9_fid *fid;
        struct inode *inode;
        struct v9fs_inode *v9inode;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        inode = dentry->d_inode;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 57ccb7537dae..cbf9dbb1b2a2 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -712,88 +712,34 @@ error:
 }
 /**
- * v9fs_vfs_create - VFS hook to create files
+ * v9fs_vfs_create - VFS hook to create a regular file
+ *
+ * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open().  This is only called
+ * for mknod(2).
+ *
 * @dir: directory inode that is being created
 * @dentry:  dentry that is being deleted
 * @mode: create permissions
- * @nd: path information
 *
 */
 static int
 v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
-        int err;
+        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-        u32 perm;
+        u32 perm = unixmode2p9mode(v9ses, mode);
-        int flags;
+        struct p9_fid *fid;
-        struct file *filp;
-        struct v9fs_inode *v9inode;
-        struct v9fs_session_info *v9ses;
-        struct p9_fid *fid, *inode_fid;
-        err = 0;
-        fid = NULL;
-        v9ses = v9fs_inode2v9ses(dir);
-        perm = unixmode2p9mode(v9ses, mode);
-        if (nd)
-                flags = nd->intent.open.flags;
-        else
-                flags = O_RDWR;
-        fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
+        /* P9_OEXCL? */
-                                v9fs_uflags2omode(flags,
+        fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR);
-                                                v9fs_proto_dotu(v9ses)));
+        if (IS_ERR(fid))
-        if (IS_ERR(fid)) {
+                return PTR_ERR(fid);
-                err = PTR_ERR(fid);
-                fid = NULL;
-                goto error;
-        }
        v9fs_invalidate_inode_attr(dir);
-        /* if we are opening a file, assign the open fid to the file */
+        p9_client_clunk(fid);
-        if (nd) {
-                v9inode = V9FS_I(dentry->d_inode);
-                mutex_lock(&v9inode->v_mutex);
-                if (v9ses->cache && !v9inode->writeback_fid &&
-                    ((flags & O_ACCMODE) != O_RDONLY)) {
-                        /*
-                         * clone a fid and add it to writeback_fid
-                         * we do it during open time instead of
-                         * page dirty time via write_begin/page_mkwrite
-                         * because we want write after unlink usecase
-                         * to work.
-                         */
-                        inode_fid = v9fs_writeback_fid(dentry);
-                        if (IS_ERR(inode_fid)) {
-                                err = PTR_ERR(inode_fid);
-                                mutex_unlock(&v9inode->v_mutex);
-                                goto error;
-                        }
-                        v9inode->writeback_fid = (void *) inode_fid;
-                }
-                mutex_unlock(&v9inode->v_mutex);
-                filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
-                if (IS_ERR(filp)) {
-                        err = PTR_ERR(filp);
-                        goto error;
-                }
-                filp->private_data = fid;
-#ifdef CONFIG_9P_FSCACHE
-                if (v9ses->cache)
-                        v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
-#endif
-        } else
-                p9_client_clunk(fid);
        return 0;
-error:
-        if (fid)
-                p9_client_clunk(fid);
-        return err;
 }
 /**
@@ -839,7 +785,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 */
 struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
-                                      struct nameidata *nameidata)
+                                      unsigned int flags)
 {
        struct dentry *res;
        struct super_block *sb;
@@ -849,8 +795,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
        char *name;
        int result = 0;
-        p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+        p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n",
-                 dir, dentry->d_name.name, dentry, nameidata);
+                 dir, dentry->d_name.name, dentry, flags);
        if (dentry->d_name.len > NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
@@ -910,6 +856,86 @@ error:
        return ERR_PTR(result);
 }
+static int
+v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
+                     struct file *file, unsigned flags, umode_t mode,
+                     int *opened)
+{
+        int err;
+        u32 perm;
+        struct v9fs_inode *v9inode;
+        struct v9fs_session_info *v9ses;
+        struct p9_fid *fid, *inode_fid;
+        struct dentry *res = NULL;
+        if (d_unhashed(dentry)) {
+                res = v9fs_vfs_lookup(dir, dentry, 0);
+                if (IS_ERR(res))
+                        return PTR_ERR(res);
+                if (res)
+                        dentry = res;
+        }
+        /* Only creates */
+        if (!(flags & O_CREAT) || dentry->d_inode)
+                return finish_no_open(file, res);
+        err = 0;
+        fid = NULL;
+        v9ses = v9fs_inode2v9ses(dir);
+        perm = unixmode2p9mode(v9ses, mode);
+        fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
+                                v9fs_uflags2omode(flags,
+                                                v9fs_proto_dotu(v9ses)));
+        if (IS_ERR(fid)) {
+                err = PTR_ERR(fid);
+                fid = NULL;
+                goto error;
+        }
+        v9fs_invalidate_inode_attr(dir);
+        v9inode = V9FS_I(dentry->d_inode);
+        mutex_lock(&v9inode->v_mutex);
+        if (v9ses->cache && !v9inode->writeback_fid &&
+            ((flags & O_ACCMODE) != O_RDONLY)) {
+                /*
+                 * clone a fid and add it to writeback_fid
+                 * we do it during open time instead of
+                 * page dirty time via write_begin/page_mkwrite
+                 * because we want write after unlink usecase
+                 * to work.
+                 */
+                inode_fid = v9fs_writeback_fid(dentry);
+                if (IS_ERR(inode_fid)) {
+                        err = PTR_ERR(inode_fid);
+                        mutex_unlock(&v9inode->v_mutex);
+                        goto error;
+                }
+                v9inode->writeback_fid = (void *) inode_fid;
+        }
+        mutex_unlock(&v9inode->v_mutex);
+        err = finish_open(file, dentry, generic_file_open, opened);
+        if (err)
+                goto error;
+        file->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
+        if (v9ses->cache)
+                v9fs_cache_inode_set_cookie(dentry->d_inode, file);
+#endif
+        *opened |= FILE_CREATED;
+out:
+        dput(res);
+        return err;
+error:
+        if (fid)
+                p9_client_clunk(fid);
+        goto out;
+}
 /**
 * v9fs_vfs_unlink - VFS unlink hook to delete an inode
 * @i:  inode that is being unlinked
@@ -1488,6 +1514,7 @@ out:
 static const struct inode_operations v9fs_dir_inode_operations_dotu = {
        .create = v9fs_vfs_create,
        .lookup = v9fs_vfs_lookup,
+        .atomic_open = v9fs_vfs_atomic_open,
        .symlink = v9fs_vfs_symlink,
        .link = v9fs_vfs_link,
        .unlink = v9fs_vfs_unlink,
@@ -1502,6 +1529,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 static const struct inode_operations v9fs_dir_inode_operations = {
        .create = v9fs_vfs_create,
        .lookup = v9fs_vfs_lookup,
+        .atomic_open = v9fs_vfs_atomic_open,
        .unlink = v9fs_vfs_unlink,
        .mkdir = v9fs_vfs_mkdir,
        .rmdir = v9fs_vfs_rmdir,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index e3dd2a1e2bfc..40895546e103 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -230,20 +230,25 @@ int v9fs_open_to_dotl_flags(int flags)
 * @dir: directory inode that is being created
 * @dentry:  dentry that is being deleted
 * @mode: create permissions
- * @nd: path information
 *
 */
 static int
 v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
-                struct nameidata *nd)
+                bool excl)
+{
+        return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+}
+static int
+v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
+                          struct file *file, unsigned flags, umode_t omode,
+                          int *opened)
 {
        int err = 0;
        gid_t gid;
-        int flags;
        umode_t mode;
        char *name = NULL;
-        struct file *filp;
        struct p9_qid qid;
        struct inode *inode;
        struct p9_fid *fid = NULL;
@@ -251,19 +256,23 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
        struct p9_fid *dfid, *ofid, *inode_fid;
        struct v9fs_session_info *v9ses;
        struct posix_acl *pacl = NULL, *dacl = NULL;
+        struct dentry *res = NULL;
-        v9ses = v9fs_inode2v9ses(dir);
+        if (d_unhashed(dentry)) {
-        if (nd)
+                res = v9fs_vfs_lookup(dir, dentry, 0);
-                flags = nd->intent.open.flags;
+                if (IS_ERR(res))
-        else {
+                        return PTR_ERR(res);
-                /*
-                 * create call without LOOKUP_OPEN is due
+                if (res)
-                 * to mknod of regular files. So use mknod
+                        dentry = res;
-                 * operation.
-                 */
-                return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
        }
+        /* Only creates */
+        if (!(flags & O_CREAT) || dentry->d_inode)
+                return finish_no_open(file, res);
+        v9ses = v9fs_inode2v9ses(dir);
        name = (char *) dentry->d_name.name;
        p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
                 name, flags, omode);
@@ -272,7 +281,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
        if (IS_ERR(dfid)) {
                err = PTR_ERR(dfid);
                p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
-                return err;
+                goto out;
        }
        /* clone a fid to use for creation */
@@ -280,7 +289,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
        if (IS_ERR(ofid)) {
                err = PTR_ERR(ofid);
                p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
-                return err;
+                goto out;
        }
        gid = v9fs_get_fsgid_for_create(dir);
@@ -345,17 +354,18 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
        }
        mutex_unlock(&v9inode->v_mutex);
        /* Since we are opening a file, assign the open fid to the file */
-        filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
+        err = finish_open(file, dentry, generic_file_open, opened);
-        if (IS_ERR(filp)) {
+        if (err)
-                err = PTR_ERR(filp);
                goto err_clunk_old_fid;
-        }
+        file->private_data = ofid;
-        filp->private_data = ofid;
 #ifdef CONFIG_9P_FSCACHE
        if (v9ses->cache)
-                v9fs_cache_inode_set_cookie(inode, filp);
+                v9fs_cache_inode_set_cookie(inode, file);
 #endif
-        return 0;
+        *opened |= FILE_CREATED;
+out:
+        dput(res);
+        return err;
 error:
        if (fid)
@@ -364,7 +374,7 @@ err_clunk_old_fid:
        if (ofid)
                p9_client_clunk(ofid);
        v9fs_set_create_acl(NULL, &dacl, &pacl);
-        return err;
+        goto out;
 }
 /**
@@ -982,6 +992,7 @@ out:
 const struct inode_operations v9fs_dir_inode_operations_dotl = {
        .create = v9fs_vfs_create_dotl,
+        .atomic_open = v9fs_vfs_atomic_open_dotl,
        .lookup = v9fs_vfs_lookup,
        .link = v9fs_vfs_link_dotl,
        .symlink = v9fs_vfs_symlink_dotl,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8c92a9ba8330..137d50396898 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -89,7 +89,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
        if (v9ses->cache)
                sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
-        sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
+        sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
        if (!v9ses->cache)
                sb->s_flags |= MS_SYNCHRONOUS;
@@ -137,7 +137,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                goto close_session;
        }
-        sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
+        sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses);
        if (IS_ERR(sb)) {
                retval = PTR_ERR(sb);
                goto clunk_fid;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 3d83075aaa2e..b3be2e7c5643 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -266,7 +266,7 @@ const struct dentry_operations adfs_dentry_operations = {
 };
 static struct dentry *
-adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode = NULL;
        struct object_info obj;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 06fdcc9382c4..bdaec92353c2 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -246,7 +246,6 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
 static void adfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
 }
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1fceb320d2f2..6e216419f340 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -3,6 +3,7 @@
 #include <linux/buffer_head.h>
 #include <linux/amigaffs.h>
 #include <linux/mutex.h>
+#include <linux/workqueue.h>
 /* AmigaOS allows file names with up to 30 characters length.
 * Names longer than that will be silently truncated. If you
@@ -100,6 +101,10 @@ struct affs_sb_info {
        char *s_prefix;                 /* Prefix for volumes and assigns. */
        char s_volume[32];              /* Volume prefix for absolute symlinks. */
        spinlock_t symlink_lock;        /* protects the previous two */
+        struct super_block *sb;         /* the VFS superblock object */
+        int work_queued;                /* non-zero delayed work is queued */
+        struct delayed_work sb_work;    /* superblock flush delayed work */
+        spinlock_t work_lock;           /* protects sb_work and work_queued */
 };
 #define SF_INTL         0x0001          /* International filesystem. */
@@ -120,6 +125,8 @@ static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
        return sb->s_fs_info;
 }
+void affs_mark_sb_dirty(struct super_block *sb);
 /* amigaffs.c */
 extern int      affs_insert_hash(struct inode *inode, struct buffer_head *bh);
@@ -146,9 +153,9 @@ extern void	affs_free_bitmap(struct super_block *sb);
 /* namei.c */
 extern int      affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
-extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
+extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
 extern int      affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int      affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
+extern int      affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool);
 extern int      affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 extern int      affs_rmdir(struct inode *dir, struct dentry *dentry);
 extern int      affs_link(struct dentry *olddentry, struct inode *dir,
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 52a6407682e6..eb82ee53ee0b 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -122,22 +122,16 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
 }
 static void
-affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
+affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
-        struct inode *inode = dentry->d_inode;
+        struct dentry *dentry;
-        void *data = dentry->d_fsdata;
+        struct hlist_node *p;
-        struct list_head *head, *next;
        spin_lock(&inode->i_lock);
-        head = &inode->i_dentry;
+        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
-        next = head->next;
-        while (next != head) {
-                dentry = list_entry(next, struct dentry, d_alias);
                if (entry_ino == (u32)(long)dentry->d_fsdata) {
-                        dentry->d_fsdata = data;
+                        dentry->d_fsdata = (void *)inode->i_ino;
                        break;
                }
-                next = next->next;
        }
        spin_unlock(&inode->i_lock);
 }
@@ -177,7 +171,11 @@ affs_remove_link(struct dentry *dentry)
                }
                affs_lock_dir(dir);
-                affs_fix_dcache(dentry, link_ino);
+                /*
+                 * if there's a dentry for that block, make it
+                 * refer to inode itself.
+                 */
+                affs_fix_dcache(inode, link_ino);
                retval = affs_remove_hash(dir, link_bh);
                if (retval) {
                        affs_unlock_dir(dir);
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 3e262711ae06..6e0be43ef6ef 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -103,7 +103,7 @@ affs_free_block(struct super_block *sb, u32 block)
        *(__be32 *)bh->b_data = cpu_to_be32(tmp - mask);
        mark_buffer_dirty(bh);
-        sb->s_dirt = 1;
+        affs_mark_sb_dirty(sb);
        bm->bm_free++;
        mutex_unlock(&sbi->s_bmlock);
@@ -248,7 +248,7 @@ find_bit:
        *(__be32 *)bh->b_data = cpu_to_be32(tmp + mask);
        mark_buffer_dirty(bh);
-        sb->s_dirt = 1;
+        affs_mark_sb_dirty(sb);
        mutex_unlock(&sbi->s_bmlock);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 47806940aac0..ff65884a7839 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -211,7 +211,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
 }
 struct dentry *
-affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct super_block *sb = dir->i_sb;
        struct buffer_head *bh;
@@ -255,7 +255,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
 }
 int
-affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode    *inode;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0782653a05a2..c70f1e5fc024 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -17,6 +17,7 @@
 #include <linux/magic.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/writeback.h>
 #include "affs.h"
 extern struct timezone sys_tz;
@@ -25,15 +26,17 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 static void
-affs_commit_super(struct super_block *sb, int wait, int clean)
+affs_commit_super(struct super_block *sb, int wait)
 {
        struct affs_sb_info *sbi = AFFS_SB(sb);
        struct buffer_head *bh = sbi->s_root_bh;
        struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
-        tail->bm_flag = cpu_to_be32(clean);
+        lock_buffer(bh);
        secs_to_datestamp(get_seconds(), &tail->disk_change);
        affs_fix_checksum(sb, bh);
+        unlock_buffer(bh);
        mark_buffer_dirty(bh);
        if (wait)
                sync_dirty_buffer(bh);
@@ -45,9 +48,7 @@ affs_put_super(struct super_block *sb)
        struct affs_sb_info *sbi = AFFS_SB(sb);
        pr_debug("AFFS: put_super()\n");
-        if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
+        cancel_delayed_work_sync(&sbi->sb_work);
-                affs_commit_super(sb, 1, 1);
        kfree(sbi->s_prefix);
        affs_free_bitmap(sb);
        affs_brelse(sbi->s_root_bh);
@@ -55,26 +56,43 @@ affs_put_super(struct super_block *sb)
        sb->s_fs_info = NULL;
 }
-static void
+static int
-affs_write_super(struct super_block *sb)
+affs_sync_fs(struct super_block *sb, int wait)
 {
-        lock_super(sb);
+        affs_commit_super(sb, wait);
-        if (!(sb->s_flags & MS_RDONLY))
+        return 0;
-                affs_commit_super(sb, 1, 2);
+}
-        sb->s_dirt = 0;
-        unlock_super(sb);
+static void flush_superblock(struct work_struct *work)
+{
+        struct affs_sb_info *sbi;
+        struct super_block *sb;
+        sbi = container_of(work, struct affs_sb_info, sb_work.work);
+        sb = sbi->sb;
-        pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
+        spin_lock(&sbi->work_lock);
+        sbi->work_queued = 0;
+        spin_unlock(&sbi->work_lock);
+        affs_commit_super(sb, 1);
 }
-static int
+void affs_mark_sb_dirty(struct super_block *sb)
-affs_sync_fs(struct super_block *sb, int wait)
 {
-        lock_super(sb);
+        struct affs_sb_info *sbi = AFFS_SB(sb);
-        affs_commit_super(sb, wait, 2);
+        unsigned long delay;
-        sb->s_dirt = 0;
-        unlock_super(sb);
+        if (sb->s_flags & MS_RDONLY)
-        return 0;
+               return;
+        spin_lock(&sbi->work_lock);
+        if (!sbi->work_queued) {
+               delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+               queue_delayed_work(system_long_wq, &sbi->sb_work, delay);
+               sbi->work_queued = 1;
+        }
+        spin_unlock(&sbi->work_lock);
 }
 static struct kmem_cache * affs_inode_cachep;
@@ -138,7 +156,6 @@ static const struct super_operations affs_sops = {
        .write_inode    = affs_write_inode,
        .evict_inode    = affs_evict_inode,
        .put_super      = affs_put_super,
-        .write_super    = affs_write_super,
        .sync_fs        = affs_sync_fs,
        .statfs         = affs_statfs,
        .remount_fs     = affs_remount,
@@ -305,8 +322,11 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
                return -ENOMEM;
        sb->s_fs_info = sbi;
+        sbi->sb = sb;
        mutex_init(&sbi->s_bmlock);
        spin_lock_init(&sbi->symlink_lock);
+        spin_lock_init(&sbi->work_lock);
+        INIT_DELAYED_WORK(&sbi->sb_work, flush_superblock);
        if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
                                &blocksize,&sbi->s_prefix,
@@ -531,6 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
                return -EINVAL;
        }
+        flush_delayed_work_sync(&sbi->sb_work);
        replace_mount_options(sb, new_opts);
        sbi->s_flags = mount_flags;
@@ -549,10 +570,9 @@ affs_remount(struct super_block *sb, int *flags, char *data)
        if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
                return 0;
-        if (*flags & MS_RDONLY) {
+        if (*flags & MS_RDONLY)
-                affs_write_super(sb);
                affs_free_bitmap(sb);
-        } else
+        else
                res = affs_init_bitmap(sb, flags);
        return res;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e22dc4b4a503..db477906ba4f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -20,16 +20,16 @@
 #include "internal.h"
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
-                                 struct nameidata *nd);
+                                 unsigned int flags);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
-static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
+static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
                                  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      struct nameidata *nd);
+                      bool excl);
 static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
 static int afs_unlink(struct inode *dir, struct dentry *dentry);
@@ -516,7 +516,7 @@ out:
 * look up an entry in a directory
 */
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
-                                 struct nameidata *nd)
+                                 unsigned int flags)
 {
        struct afs_vnode *vnode;
        struct afs_fid fid;
@@ -598,7 +598,7 @@ success:
 * - NOTE! the hit can be a negative hit too, so we can't assume we have an
 *   inode
 */
-static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct afs_vnode *vnode, *dir;
        struct afs_fid uninitialized_var(fid);
@@ -607,7 +607,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
        void *dir_version;
        int ret;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        vnode = AFS_FS_I(dentry->d_inode);
@@ -949,7 +949,7 @@ error:
 * create a regular file on an AFS filesystem
 */
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      struct nameidata *nd)
+                      bool excl)
 {
        struct afs_file_status status;
        struct afs_callback cb;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 298cf8919ec7..9682c33d5daf 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -22,7 +22,7 @@
 static struct dentry *afs_mntpt_lookup(struct inode *dir,
                                       struct dentry *dentry,
-                                       struct nameidata *nd);
+                                       unsigned int flags);
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void afs_mntpt_expiry_timed_out(struct work_struct *work);
@@ -104,7 +104,7 @@ out:
 */
 static struct dentry *afs_mntpt_lookup(struct inode *dir,
                                       struct dentry *dentry,
-                                       struct nameidata *nd)
+                                       unsigned int flags)
 {
        _enter("%p,%p{%p{%s},%s}",
               dir,
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f02b31e7e648..df8c6047c2a1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -395,7 +395,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
        as->volume = vol;
        /* allocate a deviceless superblock */
-        sb = sget(fs_type, afs_test_super, afs_set_super, as);
+        sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
        if (IS_ERR(sb)) {
                ret = PTR_ERR(sb);
                afs_put_volume(vol);
@@ -406,7 +406,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
        if (!sb->s_root) {
                /* initial superblock/root creation */
                _debug("create");
-                sb->s_flags = flags;
                ret = afs_fill_super(sb, &params);
                if (ret < 0) {
                        deactivate_locked_super(sb);
diff --git a/fs/aio.c b/fs/aio.c
index 55c4c7656053..71f613cf4a85 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -56,13 +56,6 @@ static struct kmem_cache	*kioctx_cachep;
 static struct workqueue_struct *aio_wq;
-/* Used for rare fput completion. */
-static void aio_fput_routine(struct work_struct *);
-static DECLARE_WORK(fput_work, aio_fput_routine);
-static DEFINE_SPINLOCK(fput_lock);
-static LIST_HEAD(fput_head);
 static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
@@ -479,7 +472,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
 {
        unsigned short allocated, to_alloc;
        long avail;
-        bool called_fput = false;
        struct kiocb *req, *n;
        struct aio_ring *ring;
@@ -495,28 +487,11 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
        if (allocated == 0)
                goto out;
-retry:
        spin_lock_irq(&ctx->ctx_lock);
        ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
        avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
        BUG_ON(avail < 0);
-        if (avail == 0 && !called_fput) {
-                /*
-                 * Handle a potential starvation case.  It is possible that
-                 * we hold the last reference on a struct file, causing us
-                 * to delay the final fput to non-irq context.  In this case,
-                 * ctx->reqs_active is artificially high.  Calling the fput
-                 * routine here may free up a slot in the event completion
-                 * ring, allowing this allocation to succeed.
-                 */
-                kunmap_atomic(ring);
-                spin_unlock_irq(&ctx->ctx_lock);
-                aio_fput_routine(NULL);
-                called_fput = true;
-                goto retry;
-        }
        if (avail < allocated) {
                /* Trim back the number of requests. */
                list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
@@ -570,36 +545,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
                wake_up_all(&ctx->wait);
 }
-static void aio_fput_routine(struct work_struct *data)
-{
-        spin_lock_irq(&fput_lock);
-        while (likely(!list_empty(&fput_head))) {
-                struct kiocb *req = list_kiocb(fput_head.next);
-                struct kioctx *ctx = req->ki_ctx;
-                list_del(&req->ki_list);
-                spin_unlock_irq(&fput_lock);
-                /* Complete the fput(s) */
-                if (req->ki_filp != NULL)
-                        fput(req->ki_filp);
-                /* Link the iocb into the context's free list */
-                rcu_read_lock();
-                spin_lock_irq(&ctx->ctx_lock);
-                really_put_req(ctx, req);
-                /*
-                 * at that point ctx might've been killed, but actual
-                 * freeing is RCU'd
-                 */
-                spin_unlock_irq(&ctx->ctx_lock);
-                rcu_read_unlock();
-                spin_lock_irq(&fput_lock);
-        }
-        spin_unlock_irq(&fput_lock);
-}
 /* __aio_put_req
 *      Returns true if this put was the last user of the request.
 */
@@ -618,21 +563,9 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
        req->ki_cancel = NULL;
        req->ki_retry = NULL;
-        /*
+        fput(req->ki_filp);
-         * Try to optimize the aio and eventfd file* puts, by avoiding to
+        req->ki_filp = NULL;
-         * schedule work in case it is not final fput() time. In normal cases,
+        really_put_req(ctx, req);
-         * we would not be holding the last reference to the file*, so
-         * this function will be executed w/out any aio kthread wakeup.
-         */
-        if (unlikely(!fput_atomic(req->ki_filp))) {
-                spin_lock(&fput_lock);
-                list_add(&req->ki_list, &fput_head);
-                spin_unlock(&fput_lock);
-                schedule_work(&fput_work);
-        } else {
-                req->ki_filp = NULL;
-                really_put_req(ctx, req);
-        }
        return 1;
 }
diff --git a/fs/attr.c b/fs/attr.c
index 0da90951d277..29e38a1f7f77 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -171,6 +171,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
+        WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
        if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                        return -EPERM;
@@ -250,5 +252,4 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
        return error;
 }
 EXPORT_SYMBOL(notify_change);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index aa9103f8f01b..abf645c1703b 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -257,8 +257,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
                 * corresponding to the autofs fs we want to open.
                 */
-                filp = dentry_open(path.dentry, path.mnt, O_RDONLY,
+                filp = dentry_open(&path, O_RDONLY, current_cred());
-                                   current_cred());
+                path_put(&path);
                if (IS_ERR(filp)) {
                        err = PTR_ERR(filp);
                        goto out;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 75e5f1c8e028..e7396cfdb109 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
 static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
 #endif
 static int autofs4_dir_open(struct inode *inode, struct file *file);
-static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
+static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int);
 static struct vfsmount *autofs4_d_automount(struct path *);
 static int autofs4_d_manage(struct dentry *, bool);
 static void autofs4_dentry_release(struct dentry *);
@@ -458,7 +458,7 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 }
 /* Lookups in the root directory */
-static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct autofs_sb_info *sbi;
        struct autofs_info *ino;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 1b35d6bd06b0..b1342ffb3cf6 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,13 +173,13 @@ static const struct file_operations bad_file_ops =
 };
 static int bad_inode_create (struct inode *dir, struct dentry *dentry,
-                umode_t mode, struct nameidata *nd)
+                umode_t mode, bool excl)
 {
        return -EIO;
 }
 static struct dentry *bad_inode_lookup(struct inode *dir,
-                        struct dentry *dentry, struct nameidata *nd)
+                        struct dentry *dentry, unsigned int flags)
 {
        return ERR_PTR(-EIO);
 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e18da23d42b5..cf7f3c67c8b7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -34,7 +34,7 @@ static int befs_readdir(struct file *, void *, filldir_t);
 static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 static int befs_readpage(struct file *file, struct page *page);
 static sector_t befs_bmap(struct address_space *mapping, sector_t block);
-static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int);
 static struct inode *befs_iget(struct super_block *, unsigned long);
 static struct inode *befs_alloc_inode(struct super_block *sb);
 static void befs_destroy_inode(struct inode *inode);
@@ -159,7 +159,7 @@ befs_get_block(struct inode *inode, sector_t block,
 }
 static struct dentry *
-befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode = NULL;
        struct super_block *sb = dir->i_sb;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d12c7966db27..2785ef91191a 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -85,7 +85,7 @@ const struct file_operations bfs_dir_operations = {
 extern void dump_imap(const char *, struct super_block *);
 static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                                                struct nameidata *nd)
+                                                bool excl)
 {
        int err;
        struct inode *inode;
@@ -133,7 +133,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
-                                                struct nameidata *nd)
+                                                unsigned int flags)
 {
        struct inode *inode = NULL;
        struct buffer_head *bh;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c2bbe1fb1326..1e519195d45b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1710,3 +1710,39 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
        return res;
 }
 EXPORT_SYMBOL(__invalidate_device);
+void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+{
+        struct inode *inode, *old_inode = NULL;
+        spin_lock(&inode_sb_list_lock);
+        list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
+                struct address_space *mapping = inode->i_mapping;
+                spin_lock(&inode->i_lock);
+                if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
+                    mapping->nrpages == 0) {
+                        spin_unlock(&inode->i_lock);
+                        continue;
+                }
+                __iget(inode);
+                spin_unlock(&inode->i_lock);
+                spin_unlock(&inode_sb_list_lock);
+                /*
+                 * We hold a reference to 'inode' so it couldn't have been
+                 * removed from s_inodes list while we dropped the
+                 * inode_sb_list_lock.  We cannot iput the inode now as we can
+                 * be holding the last reference and we cannot iput it under
+                 * inode_sb_list_lock. So we keep the reference and iput it
+                 * later.
+                 */
+                iput(old_inode);
+                old_inode = inode;
+                func(I_BDEV(inode), arg);
+                spin_lock(&inode_sb_list_lock);
+        }
+        spin_unlock(&inode_sb_list_lock);
+        iput(old_inode);
+}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 01c21b6c6d43..deafe19c34b5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -929,7 +929,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
 /**
- * convert_extent - convert all bits in a given range from one bit to another
+ * convert_extent_bit - convert all bits in a given range from one bit to
+ *                      another
 * @tree:       the io tree to search
 * @start:      the start offset in bytes
 * @end:        the end offset in bytes (inclusive)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a7d1921ac76b..fb8d671d00e6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4247,7 +4247,7 @@ static void btrfs_dentry_release(struct dentry *dentry)
 }
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        struct dentry *ret;
@@ -4893,7 +4893,7 @@ out_unlock:
 }
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
-                        umode_t mode, struct nameidata *nd)
+                        umode_t mode, bool excl)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -6987,7 +6987,7 @@ void btrfs_destroy_inode(struct inode *inode)
        struct btrfs_ordered_extent *ordered;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-        WARN_ON(!list_empty(&inode->i_dentry));
+        WARN_ON(!hlist_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
        WARN_ON(BTRFS_I(inode)->outstanding_extents);
        WARN_ON(BTRFS_I(inode)->reserved_extents);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0e92e5763005..1e9f6c019ad0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3268,7 +3268,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
        if (fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
-        ret = mnt_want_write(file->f_path.mnt);
+        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
@@ -3338,7 +3338,7 @@ out_bargs:
 out:
        mutex_unlock(&fs_info->balance_mutex);
        mutex_unlock(&fs_info->volume_mutex);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
        return ret;
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e23991574fdf..b19d75567728 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1068,7 +1068,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        }
        bdev = fs_devices->latest_bdev;
-        s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info);
+        s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
+                 fs_info);
        if (IS_ERR(s)) {
                error = PTR_ERR(s);
                goto error_close_devices;
@@ -1082,7 +1083,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
        } else {
                char b[BDEVNAME_SIZE];
-                s->s_flags = flags | MS_NOSEC;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
                btrfs_sb(s)->bdev_holder = fs_type;
                error = btrfs_fill_super(s, fs_devices, data,
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 7f0771d3894e..b0b5f7cdfffa 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -567,7 +567,7 @@ lookup_again:
                        if (ret < 0)
                                goto create_error;
                        start = jiffies;
-                        ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
+                        ret = vfs_create(dir->d_inode, next, S_IFREG, true);
                        cachefiles_hist(cachefiles_create_histogram, start);
                        if (ret < 0)
                                goto create_error;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0e3c0924cc3a..c0353dfac51f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -891,6 +891,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
        struct cachefiles_cache *cache;
        mm_segment_t old_fs;
        struct file *file;
+        struct path path;
        loff_t pos, eof;
        size_t len;
        void *data;
@@ -916,10 +917,9 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
        /* write the page to the backing filesystem and let it store it in its
         * own time */
-        dget(object->backer);
+        path.mnt = cache->mnt;
-        mntget(cache->mnt);
+        path.dentry = object->backer;
-        file = dentry_open(object->backer, cache->mnt, O_RDWR,
+        file = dentry_open(&path, O_RDWR, cache->cache_cred);
-                           cache->cache_cred);
        if (IS_ERR(file)) {
                ret = PTR_ERR(file);
        } else {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e8094be4604..00894ff9246c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -576,7 +576,7 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
 * the MDS so that it gets our 'caps wanted' value in a single op.
 */
 static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
-                                  struct nameidata *nd)
+                                  unsigned int flags)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -594,14 +594,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
        if (err < 0)
                return ERR_PTR(err);
-        /* open (but not create!) intent? */
-        if (nd &&
-            (nd->flags & LOOKUP_OPEN) &&
-            !(nd->intent.open.flags & O_CREAT)) {
-                int mode = nd->intent.open.create_mode & ~current->fs->umask;
-                return ceph_lookup_open(dir, dentry, nd, mode, 1);
-        }
        /* can we conclude ENOENT locally? */
        if (dentry->d_inode == NULL) {
                struct ceph_inode_info *ci = ceph_inode(dir);
@@ -642,13 +634,51 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
        return dentry;
 }
+int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+                     struct file *file, unsigned flags, umode_t mode,
+                     int *opened)
+{
+        int err;
+        struct dentry *res = NULL;
+        if (!(flags & O_CREAT)) {
+                if (dentry->d_name.len > NAME_MAX)
+                        return -ENAMETOOLONG;
+                err = ceph_init_dentry(dentry);
+                if (err < 0)
+                        return err;
+                return ceph_lookup_open(dir, dentry, file, flags, mode, opened);
+        }
+        if (d_unhashed(dentry)) {
+                res = ceph_lookup(dir, dentry, 0);
+                if (IS_ERR(res))
+                        return PTR_ERR(res);
+                if (res)
+                        dentry = res;
+        }
+        /* We don't deal with positive dentries here */
+        if (dentry->d_inode)
+                return finish_no_open(file, res);
+        *opened |= FILE_CREATED;
+        err = ceph_lookup_open(dir, dentry, file, flags, mode, opened);
+        dput(res);
+        return err;
+}
 /*
 * If we do a create but get no trace back from the MDS, follow up with
 * a lookup (the VFS expects us to link up the provided dentry).
 */
 int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 {
-        struct dentry *result = ceph_lookup(dir, dentry, NULL);
+        struct dentry *result = ceph_lookup(dir, dentry, 0);
        if (result && !IS_ERR(result)) {
                /*
@@ -700,25 +730,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 }
 static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                       struct nameidata *nd)
+                       bool excl)
 {
-        dout("create in dir %p dentry %p name '%.*s'\n",
+        return ceph_mknod(dir, dentry, mode, 0);
-             dir, dentry, dentry->d_name.len, dentry->d_name.name);
-        if (ceph_snap(dir) != CEPH_NOSNAP)
-                return -EROFS;
-        if (nd) {
-                BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
-                dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
-                /* hrm, what should i do here if we get aliased? */
-                if (IS_ERR(dentry))
-                        return PTR_ERR(dentry);
-                return 0;
-        }
-        /* fall back to mknod */
-        return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
 }
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
@@ -1028,12 +1042,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
 /*
 * Check if cached dentry can be trusted.
 */
-static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        int valid = 0;
        struct inode *dir;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
@@ -1080,7 +1094,7 @@ static void ceph_d_release(struct dentry *dentry)
 }
 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
-                                          struct nameidata *nd)
+                                          unsigned int flags)
 {
        /*
         * Eventually, we'll want to revalidate snapped metadata
@@ -1357,6 +1371,7 @@ const struct inode_operations ceph_dir_iops = {
        .rmdir = ceph_unlink,
        .rename = ceph_rename,
        .create = ceph_create,
+        .atomic_open = ceph_atomic_open,
 };
 const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 988d4f302e48..1b81d6c31878 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,22 +213,15 @@ out:
 * may_open() fails, the struct *file gets cleaned up (i.e.
 * ceph_release gets called).  So fear not!
 */
-/*
+int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
- * flags
+                     struct file *file, unsigned flags, umode_t mode,
- *  path_lookup_open   -> LOOKUP_OPEN
+                     int *opened)
- *  path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
- */
-struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-                                struct nameidata *nd, int mode,
-                                int locked_dir)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
-        struct file *file;
        struct ceph_mds_request *req;
        struct dentry *ret;
        int err;
-        int flags = nd->intent.open.flags;
        dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
             dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -236,7 +229,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
        /* do the open */
        req = prepare_open_request(dir->i_sb, flags, mode);
        if (IS_ERR(req))
-                return ERR_CAST(req);
+                return PTR_ERR(req);
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        if (flags & O_CREAT) {
@@ -254,14 +247,17 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
                err = ceph_handle_notrace_create(dir, dentry);
        if (err)
                goto out;
-        file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
+        err = finish_open(file, req->r_dentry, ceph_open, opened);
-        if (IS_ERR(file))
-                err = PTR_ERR(file);
 out:
        ret = ceph_finish_lookup(req, dentry, err);
        ceph_mdsc_put_request(req);
        dout("ceph_lookup_open result=%p\n", ret);
-        return ret;
+        if (IS_ERR(ret))
+                return PTR_ERR(ret);
+        dput(ret);
+        return err;
 }
 int ceph_release(struct inode *inode, struct file *file)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 1e67dd7305a4..7076109f014d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -871,7 +871,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
        if (ceph_test_opt(fsc->client, NOSHARE))
                compare_super = NULL;
-        sb = sget(fs_type, compare_super, ceph_set_super, fsc);
+        sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
        if (IS_ERR(sb)) {
                res = ERR_CAST(sb);
                goto out;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fc35036d258d..f4d5522cb619 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages,
                                    loff_t off, size_t len);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_open(struct inode *inode, struct file *file);
-extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
-                                       struct nameidata *nd, int mode,
+                             struct file *od, unsigned flags,
-                                       int locked_dir);
+                             umode_t mode, int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
 /* dir.c */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b6e344eb0ba..a7610cfedf0a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -257,7 +257,6 @@ cifs_alloc_inode(struct super_block *sb)
 static void cifs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
 }
@@ -638,7 +637,10 @@ cifs_do_mount(struct file_system_type *fs_type,
        mnt_data.cifs_sb = cifs_sb;
        mnt_data.flags = flags;
-        sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data);
+        /* BB should we make this contingent on mount parm? */
+        flags |= MS_NODIRATIME | MS_NOATIME;
+        sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data);
        if (IS_ERR(sb)) {
                root = ERR_CAST(sb);
                cifs_umount(cifs_sb);
@@ -649,10 +651,6 @@ cifs_do_mount(struct file_system_type *fs_type,
                cFYI(1, "Use existing superblock");
                cifs_umount(cifs_sb);
        } else {
-                sb->s_flags = flags;
-                /* BB should we make this contingent on mount parm? */
-                sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
                rc = cifs_read_super(sb);
                if (rc) {
                        root = ERR_PTR(rc);
@@ -778,6 +776,7 @@ struct file_system_type cifs_fs_type = {
 };
 const struct inode_operations cifs_dir_inode_ops = {
        .create = cifs_create,
+        .atomic_open = cifs_atomic_open,
        .lookup = cifs_lookup,
        .getattr = cifs_getattr,
        .unlink = cifs_unlink,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 65365358c976..1c49c5a9b27a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -45,9 +45,12 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
-                       struct nameidata *);
+                       bool excl);
+extern int cifs_atomic_open(struct inode *, struct dentry *,
+                            struct file *, unsigned, umode_t,
+                            int *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
-                                  struct nameidata *);
+                                  unsigned int);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
 extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
 extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5b400730c213..4ee522b3f66f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,7 +86,31 @@ static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
-/* Forward declarations */
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+static DEFINE_MUTEX(cifs_kmap_mutex);
+static inline void
+cifs_kmap_lock(void)
+{
+        mutex_lock(&cifs_kmap_mutex);
+}
+static inline void
+cifs_kmap_unlock(void)
+{
+        mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while(0)
+#define cifs_kmap_unlock() do { ; } while(0)
+#endif /* CONFIG_HIGHMEM */
 /* Mark as invalid, all open files on tree connections since they
   were closed when session to server was lost */
@@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
        }
        /* marshal up the page array */
+        cifs_kmap_lock();
        len = rdata->marshal_iov(rdata, data_len);
+        cifs_kmap_unlock();
        data_len -= len;
        /* issue the read if we have any iovecs left to fill */
@@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
         * and set the iov_len properly for each one. It may also set
         * wdata->bytes too.
         */
+        cifs_kmap_lock();
        wdata->marshal_iov(iov, wdata);
+        cifs_kmap_unlock();
        cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0ae86ddf2213..94b7788c3189 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
 #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
+/*
+ * On hosts with high memory, we can't currently support wsize/rsize that are
+ * larger than we can kmap at once. Cap the rsize/wsize at
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
+ * larger than that anyway.
+ */
+#ifdef CONFIG_HIGHMEM
+#define CIFS_KMAP_SIZE_LIMIT    (LAST_PKMAP * PAGE_CACHE_SIZE)
+#else /* CONFIG_HIGHMEM */
+#define CIFS_KMAP_SIZE_LIMIT    (1<<24)
+#endif /* CONFIG_HIGHMEM */
 static unsigned int
 cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 {
@@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
                wsize = min_t(unsigned int, wsize,
                                server->maxBuf - sizeof(WRITE_REQ) + 4);
+        /* limit to the amount that we can kmap at once */
+        wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
        /* hard limit of CIFS_MAX_WSIZE */
        wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
@@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
        if (!(server->capabilities & CAP_LARGE_READ_X))
                rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
+        /* limit to the amount that we can kmap at once */
+        rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
        /* hard limit of CIFS_MAX_RSIZE */
        rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ec4e9a2a12f8..a180265a10b5 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -133,108 +133,141 @@ cifs_bp_rename_retry:
        return full_path;
 }
+/*
+ * Don't allow the separator character in a path component.
+ * The VFS will not allow "/", but "\" is allowed by posix.
+ */
+static int
+check_name(struct dentry *direntry)
+{
+        struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
+        int i;
+        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
+                for (i = 0; i < direntry->d_name.len; i++) {
+                        if (direntry->d_name.name[i] == '\\') {
+                                cFYI(1, "Invalid file name");
+                                return -EINVAL;
+                        }
+                }
+        }
+        return 0;
+}
 /* Inode operations in similar order to how they appear in Linux file fs.h */
-int
+static int cifs_do_create(struct inode *inode, struct dentry *direntry,
-cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
+                          int xid, struct tcon_link *tlink, unsigned oflags,
-                struct nameidata *nd)
+                          umode_t mode, __u32 *oplock, __u16 *fileHandle,
+                          int *created)
 {
        int rc = -ENOENT;
-        int xid;
        int create_options = CREATE_NOT_DIR;
-        __u32 oplock = 0;
+        int desiredAccess;
-        int oflags;
+        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-        /*
+        struct cifs_tcon *tcon = tlink_tcon(tlink);
-         * BB below access is probably too much for mknod to request
-         *    but we have to do query and setpathinfo so requesting
-         *    less could fail (unless we want to request getatr and setatr
-         *    permissions (only).  At least for POSIX we do not have to
-         *    request so much.
-         */
-        int desiredAccess = GENERIC_READ | GENERIC_WRITE;
-        __u16 fileHandle;
-        struct cifs_sb_info *cifs_sb;
-        struct tcon_link *tlink;
-        struct cifs_tcon *tcon;
        char *full_path = NULL;
        FILE_ALL_INFO *buf = NULL;
        struct inode *newinode = NULL;
-        int disposition = FILE_OVERWRITE_IF;
+        int disposition;
-        xid = GetXid();
-        cifs_sb = CIFS_SB(inode->i_sb);
-        tlink = cifs_sb_tlink(cifs_sb);
-        if (IS_ERR(tlink)) {
-                FreeXid(xid);
-                return PTR_ERR(tlink);
-        }
-        tcon = tlink_tcon(tlink);
+        *oplock = 0;
        if (tcon->ses->server->oplocks)
-                oplock = REQ_OPLOCK;
+                *oplock = REQ_OPLOCK;
-        if (nd)
-                oflags = nd->intent.open.file->f_flags;
-        else
-                oflags = O_RDONLY | O_CREAT;
        full_path = build_path_from_dentry(direntry);
        if (full_path == NULL) {
                rc = -ENOMEM;
-                goto cifs_create_out;
+                goto out;
        }
        if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
+            !tcon->broken_posix_open &&
            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
                        le64_to_cpu(tcon->fsUnixInfo.Capability))) {
                rc = cifs_posix_open(full_path, &newinode,
-                        inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
+                        inode->i_sb, mode, oflags, oplock, fileHandle, xid);
-                /* EIO could indicate that (posix open) operation is not
+                switch (rc) {
-                   supported, despite what server claimed in capability
+                case 0:
-                   negotiation.  EREMOTE indicates DFS junction, which is not
+                        if (newinode == NULL) {
-                   handled in posix open */
+                                /* query inode info */
-                if (rc == 0) {
-                        if (newinode == NULL) /* query inode info */
                                goto cifs_create_get_file_info;
-                        else /* success, no need to query */
+                        }
-                                goto cifs_create_set_dentry;
-                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
+                        if (!S_ISREG(newinode->i_mode)) {
-                         (rc != -EOPNOTSUPP) && (rc != -EINVAL))
+                                /*
-                        goto cifs_create_out;
+                                 * The server may allow us to open things like
-                /* else fallthrough to retry, using older open call, this is
+                                 * FIFOs, but the client isn't set up to deal
-                   case where server does not support this SMB level, and
+                                 * with that. If it's not a regular file, just
-                   falsely claims capability (also get here for DFS case
+                                 * close it and proceed as if it were a normal
-                   which should be rare for path not covered on files) */
+                                 * lookup.
-        }
+                                 */
+                                CIFSSMBClose(xid, tcon, *fileHandle);
+                                goto cifs_create_get_file_info;
+                        }
+                        /* success, no need to query */
+                        goto cifs_create_set_dentry;
+                case -ENOENT:
+                        goto cifs_create_get_file_info;
+                case -EIO:
+                case -EINVAL:
+                        /*
+                         * EIO could indicate that (posix open) operation is not
+                         * supported, despite what server claimed in capability
+                         * negotiation.
+                         *
+                         * POSIX open in samba versions 3.3.1 and earlier could
+                         * incorrectly fail with invalid parameter.
+                         */
+                        tcon->broken_posix_open = true;
+                        break;
+                case -EREMOTE:
+                case -EOPNOTSUPP:
+                        /*
+                         * EREMOTE indicates DFS junction, which is not handled
+                         * in posix open.  If either that or op not supported
+                         * returned, follow the normal lookup.
+                         */
+                        break;
-        if (nd) {
+                default:
-                /* if the file is going to stay open, then we
+                        goto out;
-                   need to set the desired access properly */
+                }
-                desiredAccess = 0;
+                /*
-                if (OPEN_FMODE(oflags) & FMODE_READ)
+                 * fallthrough to retry, using older open call, this is case
-                        desiredAccess |= GENERIC_READ; /* is this too little? */
+                 * where server does not support this SMB level, and falsely
-                if (OPEN_FMODE(oflags) & FMODE_WRITE)
+                 * claims capability (also get here for DFS case which should be
-                        desiredAccess |= GENERIC_WRITE;
+                 * rare for path not covered on files)
+                 */
-                if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-                        disposition = FILE_CREATE;
-                else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
-                        disposition = FILE_OVERWRITE_IF;
-                else if ((oflags & O_CREAT) == O_CREAT)
-                        disposition = FILE_OPEN_IF;
-                else
-                        cFYI(1, "Create flag not set in create function");
        }
+        desiredAccess = 0;
+        if (OPEN_FMODE(oflags) & FMODE_READ)
+                desiredAccess |= GENERIC_READ; /* is this too little? */
+        if (OPEN_FMODE(oflags) & FMODE_WRITE)
+                desiredAccess |= GENERIC_WRITE;
+        disposition = FILE_OVERWRITE_IF;
+        if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+                disposition = FILE_CREATE;
+        else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+                disposition = FILE_OVERWRITE_IF;
+        else if ((oflags & O_CREAT) == O_CREAT)
+                disposition = FILE_OPEN_IF;
+        else
+                cFYI(1, "Create flag not set in create function");
        /* BB add processing to set equivalent of mode - e.g. via CreateX with
           ACLs */
        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
        if (buf == NULL) {
                rc = -ENOMEM;
-                goto cifs_create_out;
+                goto out;
        }
        /*
@@ -250,7 +283,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
        if (tcon->ses->capabilities & CAP_NT_SMBS)
                rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
                         desiredAccess, create_options,
-                         &fileHandle, &oplock, buf, cifs_sb->local_nls,
+                         fileHandle, oplock, buf, cifs_sb->local_nls,
                         cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
        else
                rc = -EIO; /* no NT SMB support fall into legacy open below */
@@ -259,17 +292,17 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
                /* old server, retry the open legacy style */
                rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
                        desiredAccess, create_options,
-                        &fileHandle, &oplock, buf, cifs_sb->local_nls,
+                        fileHandle, oplock, buf, cifs_sb->local_nls,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
        }
        if (rc) {
                cFYI(1, "cifs_create returned 0x%x", rc);
-                goto cifs_create_out;
+                goto out;
        }
        /* If Open reported that we actually created a file
           then we now have to set the mode if possible */
-        if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
+        if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
                struct cifs_unix_set_info_args args = {
                                .mode   = mode,
                                .ctime  = NO_CHANGE_64,
@@ -278,6 +311,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
                                .device = 0,
                };
+                *created |= FILE_CREATED;
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                        args.uid = (__u64) current_fsuid();
                        if (inode->i_mode & S_ISGID)
@@ -288,7 +322,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
                        args.uid = NO_CHANGE_64;
                        args.gid = NO_CHANGE_64;
                }
-                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle,
+                CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle,
                                        current->tgid);
        } else {
                /* BB implement mode setting via Windows security
@@ -305,11 +339,11 @@ cifs_create_get_file_info:
                                              inode->i_sb, xid);
        else {
                rc = cifs_get_inode_info(&newinode, full_path, buf,
-                                         inode->i_sb, xid, &fileHandle);
+                                         inode->i_sb, xid, fileHandle);
                if (newinode) {
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
                                newinode->i_mode = mode;
-                        if ((oplock & CIFS_CREATE_ACTION) &&
+                        if ((*oplock & CIFS_CREATE_ACTION) &&
                            (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
                                newinode->i_uid = current_fsuid();
                                if (inode->i_mode & S_ISGID)
@@ -321,40 +355,139 @@ cifs_create_get_file_info:
        }
 cifs_create_set_dentry:
-        if (rc == 0)
+        if (rc != 0) {
-                d_instantiate(direntry, newinode);
-        else
                cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
+                goto out;
+        }
+        d_drop(direntry);
+        d_add(direntry, newinode);
-        if (newinode && nd) {
+        /* ENOENT for create?  How weird... */
-                struct cifsFileInfo *pfile_info;
+        rc = -ENOENT;
-                struct file *filp;
+        if (!newinode) {
+                CIFSSMBClose(xid, tcon, *fileHandle);
+                goto out;
+        }
+        rc = 0;
-                filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+out:
-                if (IS_ERR(filp)) {
+        kfree(buf);
-                        rc = PTR_ERR(filp);
+        kfree(full_path);
-                        CIFSSMBClose(xid, tcon, fileHandle);
+        return rc;
-                        goto cifs_create_out;
+}
-                }
-                pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
+int
-                if (pfile_info == NULL) {
+cifs_atomic_open(struct inode *inode, struct dentry *direntry,
-                        fput(filp);
+                 struct file *file, unsigned oflags, umode_t mode,
-                        CIFSSMBClose(xid, tcon, fileHandle);
+                 int *opened)
-                        rc = -ENOMEM;
+{
-                }
+        int rc;
-        } else {
+        int xid;
+        struct tcon_link *tlink;
+        struct cifs_tcon *tcon;
+        __u16 fileHandle;
+        __u32 oplock;
+        struct file *filp;
+        struct cifsFileInfo *pfile_info;
+        /* Posix open is only called (at lookup time) for file create now.  For
+         * opens (rather than creates), because we do not know if it is a file
+         * or directory yet, and current Samba no longer allows us to do posix
+         * open on dirs, we could end up wasting an open call on what turns out
+         * to be a dir. For file opens, we wait to call posix open till
+         * cifs_open.  It could be added to atomic_open in the future but the
+         * performance tradeoff of the extra network request when EISDIR or
+         * EACCES is returned would have to be weighed against the 50% reduction
+         * in network traffic in the other paths.
+         */
+        if (!(oflags & O_CREAT)) {
+                struct dentry *res = cifs_lookup(inode, direntry, 0);
+                if (IS_ERR(res))
+                        return PTR_ERR(res);
+                return finish_no_open(file, res);
+        }
+        rc = check_name(direntry);
+        if (rc)
+                return rc;
+        xid = GetXid();
+        cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
+             inode, direntry->d_name.name, direntry);
+        tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+        filp = ERR_CAST(tlink);
+        if (IS_ERR(tlink))
+                goto free_xid;
+        tcon = tlink_tcon(tlink);
+        rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+                            &oplock, &fileHandle, opened);
+        if (rc)
+                goto out;
+        rc = finish_open(file, direntry, generic_file_open, opened);
+        if (rc) {
                CIFSSMBClose(xid, tcon, fileHandle);
+                goto out;
        }
-cifs_create_out:
+        pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
-        kfree(buf);
+        if (pfile_info == NULL) {
-        kfree(full_path);
+                CIFSSMBClose(xid, tcon, fileHandle);
+                fput(filp);
+                rc = -ENOMEM;
+        }
+out:
        cifs_put_tlink(tlink);
+free_xid:
        FreeXid(xid);
        return rc;
 }
+int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
+                bool excl)
+{
+        int rc;
+        int xid = GetXid();
+        /*
+         * BB below access is probably too much for mknod to request
+         *    but we have to do query and setpathinfo so requesting
+         *    less could fail (unless we want to request getatr and setatr
+         *    permissions (only).  At least for POSIX we do not have to
+         *    request so much.
+         */
+        unsigned oflags = O_EXCL | O_CREAT | O_RDWR;
+        struct tcon_link *tlink;
+        __u16 fileHandle;
+        __u32 oplock;
+        int created = FILE_CREATED;
+        cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
+             inode, direntry->d_name.name, direntry);
+        tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
+        rc = PTR_ERR(tlink);
+        if (IS_ERR(tlink))
+                goto free_xid;
+        rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
+                            &oplock, &fileHandle, &created);
+        if (!rc)
+                CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle);
+        cifs_put_tlink(tlink);
+free_xid:
+        FreeXid(xid);
+        return rc;
+}
 int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                dev_t device_number)
 {
@@ -488,20 +621,15 @@ mknod_out:
 struct dentry *
 cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
-            struct nameidata *nd)
+            unsigned int flags)
 {
        int xid;
        int rc = 0; /* to get around spurious gcc warning, set to zero here */
-        __u32 oplock;
-        __u16 fileHandle = 0;
-        bool posix_open = false;
        struct cifs_sb_info *cifs_sb;
        struct tcon_link *tlink;
        struct cifs_tcon *pTcon;
-        struct cifsFileInfo *cfile;
        struct inode *newInode = NULL;
        char *full_path = NULL;
-        struct file *filp;
        xid = GetXid();
@@ -518,31 +646,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
        }
        pTcon = tlink_tcon(tlink);
-        oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0;
+        rc = check_name(direntry);
+        if (rc)
-        /*
-         * Don't allow the separator character in a path component.
-         * The VFS will not allow "/", but "\" is allowed by posix.
-         */
-        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
-                int i;
-                for (i = 0; i < direntry->d_name.len; i++)
-                        if (direntry->d_name.name[i] == '\\') {
-                                cFYI(1, "Invalid file name");
-                                rc = -EINVAL;
-                                goto lookup_out;
-                        }
-        }
-        /*
-         * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
-         * the VFS handle the create.
-         */
-        if (nd && (nd->flags & LOOKUP_EXCL)) {
-                d_instantiate(direntry, NULL);
-                rc = 0;
                goto lookup_out;
-        }
        /* can not grab the rename sem here since it would
        deadlock in the cases (beginning of sys_rename itself)
@@ -560,80 +666,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
        }
        cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);
-        /* Posix open is only called (at lookup time) for file create now.
-         * For opens (rather than creates), because we do not know if it
-         * is a file or directory yet, and current Samba no longer allows
-         * us to do posix open on dirs, we could end up wasting an open call
-         * on what turns out to be a dir. For file opens, we wait to call posix
-         * open till cifs_open.  It could be added here (lookup) in the future
-         * but the performance tradeoff of the extra network request when EISDIR
-         * or EACCES is returned would have to be weighed against the 50%
-         * reduction in network traffic in the other paths.
-         */
        if (pTcon->unix_ext) {
-                if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
+                rc = cifs_get_inode_info_unix(&newInode, full_path,
-                     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
+                                              parent_dir_inode->i_sb, xid);
-                     (nd->intent.open.file->f_flags & O_CREAT)) {
+        } else {
-                        rc = cifs_posix_open(full_path, &newInode,
-                                        parent_dir_inode->i_sb,
-                                        nd->intent.open.create_mode,
-                                        nd->intent.open.file->f_flags, &oplock,
-                                        &fileHandle, xid);
-                        /*
-                         * The check below works around a bug in POSIX
-                         * open in samba versions 3.3.1 and earlier where
-                         * open could incorrectly fail with invalid parameter.
-                         * If either that or op not supported returned, follow
-                         * the normal lookup.
-                         */
-                        switch (rc) {
-                        case 0:
-                                /*
-                                 * The server may allow us to open things like
-                                 * FIFOs, but the client isn't set up to deal
-                                 * with that. If it's not a regular file, just
-                                 * close it and proceed as if it were a normal
-                                 * lookup.
-                                 */
-                                if (newInode && !S_ISREG(newInode->i_mode)) {
-                                        CIFSSMBClose(xid, pTcon, fileHandle);
-                                        break;
-                                }
-                        case -ENOENT:
-                                posix_open = true;
-                        case -EOPNOTSUPP:
-                                break;
-                        default:
-                                pTcon->broken_posix_open = true;
-                        }
-                }
-                if (!posix_open)
-                        rc = cifs_get_inode_info_unix(&newInode, full_path,
-                                                parent_dir_inode->i_sb, xid);
-        } else
                rc = cifs_get_inode_info(&newInode, full_path, NULL,
                                parent_dir_inode->i_sb, xid, NULL);
+        }
        if ((rc == 0) && (newInode != NULL)) {
                d_add(direntry, newInode);
-                if (posix_open) {
-                        filp = lookup_instantiate_filp(nd, direntry,
-                                                       generic_file_open);
-                        if (IS_ERR(filp)) {
-                                rc = PTR_ERR(filp);
-                                CIFSSMBClose(xid, pTcon, fileHandle);
-                                goto lookup_out;
-                        }
-                        cfile = cifs_new_fileinfo(fileHandle, filp, tlink,
-                                                  oplock);
-                        if (cfile == NULL) {
-                                fput(filp);
-                                CIFSSMBClose(xid, pTcon, fileHandle);
-                                rc = -ENOMEM;
-                                goto lookup_out;
-                        }
-                }
                /* since paths are not looked up by component - the parent
                   directories are presumed to be good here */
                renew_parental_timestamps(direntry);
@@ -658,9 +700,9 @@ lookup_out:
 }
 static int
-cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
+cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
 {
-        if (nd && (nd->flags & LOOKUP_RCU))
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        if (direntry->d_inode) {
@@ -689,7 +731,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
         * This may be nfsd (or something), anyway, we can't see the
         * intent of this. So, since this can be for creation, drop it.
         */
-        if (!nd)
+        if (!flags)
                return 0;
        /*
@@ -697,7 +739,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
         * case sensitive name which is specified by user if this is
         * for creation.
         */
-        if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+        if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
                return 0;
        if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 745da3d0653e..8e8bb49112ff 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -800,7 +800,7 @@ cifs_find_inode(struct inode *inode, void *opaque)
                return 0;
        /* if it's not a directory or has no dentries, then flag it */
-        if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry))
+        if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry))
                fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
        return 1;
@@ -825,9 +825,10 @@ static bool
 inode_has_hashed_dentries(struct inode *inode)
 {
        struct dentry *dentry;
+        struct hlist_node *p;
        spin_lock(&inode->i_lock);
-        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
                if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
                        spin_unlock(&inode->i_lock);
                        return true;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c5..a4217f02fab2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
        dentry = d_lookup(parent, name);
        if (dentry) {
-                /* FIXME: check for inode number changes? */
+                inode = dentry->d_inode;
-                if (dentry->d_inode != NULL)
+                /* update inode in place if i_ino didn't change */
+                if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
+                        cifs_fattr_to_inode(inode, fattr);
                        return dentry;
+                }
                d_drop(dentry);
                dput(dentry);
        }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3097ee58fd7d..f25d4ea14be4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
        if (mid == NULL)
                return -ENOMEM;
-        /* put it on the pending_mid_q */
-        spin_lock(&GlobalMid_Lock);
-        list_add_tail(&mid->qhead, &server->pending_mid_q);
-        spin_unlock(&GlobalMid_Lock);
        rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
-        if (rc)
+        if (rc) {
-                delete_mid(mid);
+                DeleteMidQEntry(mid);
+                return rc;
+        }
        *ret_mid = mid;
-        return rc;
+        return 0;
 }
 /*
@@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
        mid->callback_data = cbdata;
        mid->mid_state = MID_REQUEST_SUBMITTED;
+        /* put it on the pending_mid_q */
+        spin_lock(&GlobalMid_Lock);
+        list_add_tail(&mid->qhead, &server->pending_mid_q);
+        spin_unlock(&GlobalMid_Lock);
        cifs_in_send_inc(server);
        rc = smb_sendv(server, iov, nvec);
        cifs_in_send_dec(server);
        cifs_save_when_sent(mid);
        mutex_unlock(&server->srv_mutex);
-        if (rc)
+        if (rc == 0)
-                goto out_err;
+                return 0;
-        return rc;
-out_err:
        delete_mid(mid);
        add_credits(server, 1);
        wake_up(&server->request_q);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 690157876184..958ae0e0ff8c 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -89,17 +89,13 @@ int coda_cache_check(struct inode *inode, int mask)
 /* this won't do any harm: just flag all children */
 static void coda_flag_children(struct dentry *parent, int flag)
 {
-        struct list_head *child;
        struct dentry *de;
        spin_lock(&parent->d_lock);
-        list_for_each(child, &parent->d_subdirs)
+        list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
-        {
-                de = list_entry(child, struct dentry, d_u.d_child);
                /* don't know what to do with negative dentries */
-                if ( ! de->d_inode ) 
+                if (de->d_inode ) 
-                        continue;
+                        coda_flag_inode(de->d_inode, flag);
-                coda_flag_inode(de->d_inode, flag);
        }
        spin_unlock(&parent->d_lock);
        return; 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 177515829062..49fe52d25600 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,8 +30,8 @@
 #include "coda_int.h"
 /* dir inode-ops */
-static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
+static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl);
-static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
+static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags);
 static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 
                     struct dentry *entry);
 static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
@@ -46,7 +46,7 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry,
 static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
 /* dentry ops */
-static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
+static int coda_dentry_revalidate(struct dentry *de, unsigned int flags);
 static int coda_dentry_delete(const struct dentry *);
 /* support routines */
@@ -94,7 +94,7 @@ const struct file_operations coda_dir_operations = {
 /* inode operations for directories */
 /* access routines: lookup, readlink, permission */
-static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd)
+static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags)
 {
        struct super_block *sb = dir->i_sb;
        const char *name = entry->d_name.name;
@@ -188,7 +188,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
 }
 /* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd)
+static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl)
 {
        int error;
        const char *name=de->d_name.name;
@@ -536,12 +536,12 @@ out:
 }
 /* called when a cache lookup succeeds */
-static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
+static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
 {
        struct inode *inode;
        struct coda_inode_info *cii;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        inode = de->d_inode;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7e6c52d8a207..7414ae24a79b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
 static struct dentry * configfs_lookup(struct inode *dir,
                                       struct dentry *dentry,
-                                       struct nameidata *nd)
+                                       unsigned int flags)
 {
        struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
        struct configfs_dirent * sd;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index d013c46402ed..28cca01ca9c9 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -417,7 +417,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 /*
 * Lookup and fill in the inode data..
 */
-static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        unsigned int offset = 0;
        struct inode *inode = NULL;
diff --git a/fs/dcache.c b/fs/dcache.c
index 40469044088d..8086636bf796 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -218,7 +218,7 @@ static void __d_free(struct rcu_head *head)
 {
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-        WARN_ON(!list_empty(&dentry->d_alias));
+        WARN_ON(!hlist_unhashed(&dentry->d_alias));
        if (dname_external(dentry))
                kfree(dentry->d_name.name);
        kmem_cache_free(dentry_cache, dentry); 
@@ -267,7 +267,7 @@ static void dentry_iput(struct dentry * dentry)
        struct inode *inode = dentry->d_inode;
        if (inode) {
                dentry->d_inode = NULL;
-                list_del_init(&dentry->d_alias);
+                hlist_del_init(&dentry->d_alias);
                spin_unlock(&dentry->d_lock);
                spin_unlock(&inode->i_lock);
                if (!inode->i_nlink)
@@ -291,7 +291,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 {
        struct inode *inode = dentry->d_inode;
        dentry->d_inode = NULL;
-        list_del_init(&dentry->d_alias);
+        hlist_del_init(&dentry->d_alias);
        dentry_rcuwalk_barrier(dentry);
        spin_unlock(&dentry->d_lock);
        spin_unlock(&inode->i_lock);
@@ -699,10 +699,11 @@ EXPORT_SYMBOL(dget_parent);
 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 {
        struct dentry *alias, *discon_alias;
+        struct hlist_node *p;
 again:
        discon_alias = NULL;
-        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
                spin_lock(&alias->d_lock);
                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
                        if (IS_ROOT(alias) &&
@@ -737,7 +738,7 @@ struct dentry *d_find_alias(struct inode *inode)
 {
        struct dentry *de = NULL;
-        if (!list_empty(&inode->i_dentry)) {
+        if (!hlist_empty(&inode->i_dentry)) {
                spin_lock(&inode->i_lock);
                de = __d_find_alias(inode, 0);
                spin_unlock(&inode->i_lock);
@@ -753,9 +754,10 @@ EXPORT_SYMBOL(d_find_alias);
 void d_prune_aliases(struct inode *inode)
 {
        struct dentry *dentry;
+        struct hlist_node *p;
 restart:
        spin_lock(&inode->i_lock);
-        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
                spin_lock(&dentry->d_lock);
                if (!dentry->d_count) {
                        __dget_dlock(dentry);
@@ -977,7 +979,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                        inode = dentry->d_inode;
                        if (inode) {
                                dentry->d_inode = NULL;
-                                list_del_init(&dentry->d_alias);
+                                hlist_del_init(&dentry->d_alias);
                                if (dentry->d_op && dentry->d_op->d_iput)
                                        dentry->d_op->d_iput(dentry, inode);
                                else
@@ -1312,7 +1314,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
        INIT_HLIST_BL_NODE(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_LIST_HEAD(&dentry->d_subdirs);
-        INIT_LIST_HEAD(&dentry->d_alias);
+        INIT_HLIST_NODE(&dentry->d_alias);
        INIT_LIST_HEAD(&dentry->d_u.d_child);
        d_set_d_op(dentry, dentry->d_sb->s_d_op);
@@ -1400,7 +1402,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        if (inode) {
                if (unlikely(IS_AUTOMOUNT(inode)))
                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-                list_add(&dentry->d_alias, &inode->i_dentry);
+                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
        }
        dentry->d_inode = inode;
        dentry_rcuwalk_barrier(dentry);
@@ -1425,7 +1427,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-        BUG_ON(!list_empty(&entry->d_alias));
+        BUG_ON(!hlist_unhashed(&entry->d_alias));
        if (inode)
                spin_lock(&inode->i_lock);
        __d_instantiate(entry, inode);
@@ -1458,13 +1460,14 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
        int len = entry->d_name.len;
        const char *name = entry->d_name.name;
        unsigned int hash = entry->d_name.hash;
+        struct hlist_node *p;
        if (!inode) {
                __d_instantiate(entry, NULL);
                return NULL;
        }
-        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
                /*
                 * Don't need alias->d_lock here, because aliases with
                 * d_parent == entry->d_parent are not subject to name or
@@ -1490,7 +1493,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
        struct dentry *result;
-        BUG_ON(!list_empty(&entry->d_alias));
+        BUG_ON(!hlist_unhashed(&entry->d_alias));
        if (inode)
                spin_lock(&inode->i_lock);
@@ -1531,9 +1534,9 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 {
        struct dentry *alias;
-        if (list_empty(&inode->i_dentry))
+        if (hlist_empty(&inode->i_dentry))
                return NULL;
-        alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
        __dget(alias);
        return alias;
 }
@@ -1607,7 +1610,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
        spin_lock(&tmp->d_lock);
        tmp->d_inode = inode;
        tmp->d_flags |= DCACHE_DISCONNECTED;
-        list_add(&tmp->d_alias, &inode->i_dentry);
+        hlist_add_head(&tmp->d_alias, &inode->i_dentry);
        hlist_bl_lock(&tmp->d_sb->s_anon);
        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
        hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -2384,14 +2387,13 @@ static struct dentry *__d_unalias(struct inode *inode,
                struct dentry *dentry, struct dentry *alias)
 {
        struct mutex *m1 = NULL, *m2 = NULL;
-        struct dentry *ret;
+        struct dentry *ret = ERR_PTR(-EBUSY);
        /* If alias and dentry share a parent, then no extra locks required */
        if (alias->d_parent == dentry->d_parent)
                goto out_unalias;
        /* See lock_rename() */
-        ret = ERR_PTR(-EBUSY);
        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
                goto out_err;
        m1 = &dentry->d_sb->s_vfs_rename_mutex;
@@ -2399,8 +2401,10 @@ static struct dentry *__d_unalias(struct inode *inode,
                goto out_err;
        m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
-        __d_move(alias, dentry);
+        if (likely(!d_mountpoint(alias))) {
-        ret = alias;
+                __d_move(alias, dentry);
+                ret = alias;
+        }
 out_err:
        spin_unlock(&inode->i_lock);
        if (m2)
@@ -2622,7 +2626,7 @@ global_root:
        if (!slash)
                error = prepend(buffer, buflen, "/", 1);
        if (!error)
-                error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
+                error = is_mounted(vfsmnt) ? 1 : 2;
        goto out;
 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b80bc846a15a..d17c20fd74e6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -54,13 +54,12 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
                        break;
                case S_IFLNK:
                        inode->i_op = &debugfs_link_operations;
-                        inode->i_fop = fops;
                        inode->i_private = data;
                        break;
                case S_IFDIR:
                        inode->i_op = &simple_dir_inode_operations;
-                        inode->i_fop = fops ? fops : &simple_dir_operations;
+                        inode->i_fop = &simple_dir_operations;
-                        inode->i_private = data;
+                        inode->i_private = NULL;
                        /* directory inodes start off with i_nlink == 2
                         * (for "." entry) */
@@ -91,13 +90,12 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
        return error;
 }
-static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
+static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
-                         void *data, const struct file_operations *fops)
 {
        int res;
        mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
-        res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
+        res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL);
        if (!res) {
                inc_nlink(dir);
                fsnotify_mkdir(dir, dentry);
@@ -106,10 +104,10 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
-                        void *data, const struct file_operations *fops)
+                        void *data)
 {
        mode = (mode & S_IALLUGO) | S_IFLNK;
-        return debugfs_mknod(dir, dentry, mode, 0, data, fops);
+        return debugfs_mknod(dir, dentry, mode, 0, data, NULL);
 }
 static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
@@ -293,13 +291,19 @@ static struct file_system_type debug_fs_type = {
        .kill_sb =      kill_litter_super,
 };
-static int debugfs_create_by_name(const char *name, umode_t mode,
+struct dentry *__create_file(const char *name, umode_t mode,
-                                  struct dentry *parent,
+                                   struct dentry *parent, void *data,
-                                  struct dentry **dentry,
+                                   const struct file_operations *fops)
-                                  void *data,
-                                  const struct file_operations *fops)
 {
-        int error = 0;
+        struct dentry *dentry = NULL;
+        int error;
+        pr_debug("debugfs: creating file '%s'\n",name);
+        error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
+                              &debugfs_mount_count);
+        if (error)
+                goto exit;
        /* If the parent is not specified, we create it in the root.
         * We need the root dentry to do this, which is in the super 
@@ -309,30 +313,35 @@ static int debugfs_create_by_name(const char *name, umode_t mode,
        if (!parent)
                parent = debugfs_mount->mnt_root;
-        *dentry = NULL;
+        dentry = NULL;
        mutex_lock(&parent->d_inode->i_mutex);
-        *dentry = lookup_one_len(name, parent, strlen(name));
+        dentry = lookup_one_len(name, parent, strlen(name));
-        if (!IS_ERR(*dentry)) {
+        if (!IS_ERR(dentry)) {
                switch (mode & S_IFMT) {
                case S_IFDIR:
-                        error = debugfs_mkdir(parent->d_inode, *dentry, mode,
+                        error = debugfs_mkdir(parent->d_inode, dentry, mode);
-                                              data, fops);
+                                              
                        break;
                case S_IFLNK:
-                        error = debugfs_link(parent->d_inode, *dentry, mode,
+                        error = debugfs_link(parent->d_inode, dentry, mode,
-                                             data, fops);
+                                             data);
                        break;
                default:
-                        error = debugfs_create(parent->d_inode, *dentry, mode,
+                        error = debugfs_create(parent->d_inode, dentry, mode,
                                               data, fops);
                        break;
                }
-                dput(*dentry);
+                dput(dentry);
        } else
-                error = PTR_ERR(*dentry);
+                error = PTR_ERR(dentry);
        mutex_unlock(&parent->d_inode->i_mutex);
-        return error;
+        if (error) {
+                dentry = NULL;
+                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+        }
+exit:
+        return dentry;
 }
 /**
@@ -365,25 +374,15 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
                                   struct dentry *parent, void *data,
                                   const struct file_operations *fops)
 {
-        struct dentry *dentry = NULL;
+        switch (mode & S_IFMT) {
-        int error;
+        case S_IFREG:
+        case 0:
-        pr_debug("debugfs: creating file '%s'\n",name);
+                break;
+        default:
-        error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
+                BUG();
-                              &debugfs_mount_count);
-        if (error)
-                goto exit;
-        error = debugfs_create_by_name(name, mode, parent, &dentry,
-                                       data, fops);
-        if (error) {
-                dentry = NULL;
-                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-                goto exit;
        }
-exit:
-        return dentry;
+        return __create_file(name, mode, parent, data, fops);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_file);
@@ -407,8 +406,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
 */
 struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
 {
-        return debugfs_create_file(name, 
+        return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
-                                   S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
                                   parent, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_dir);
@@ -446,8 +444,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
        if (!link)
                return NULL;
-        result = debugfs_create_file(name, S_IFLNK | S_IRWXUGO, parent, link,
+        result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL);
-                                     NULL);
        if (!result)
                kfree(link);
        return result;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 979c1e309c73..14afbabe6546 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -439,15 +439,15 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
                return ERR_PTR(error);
        if (opts.newinstance)
-                s = sget(fs_type, NULL, set_anon_super, NULL);
+                s = sget(fs_type, NULL, set_anon_super, flags, NULL);
        else
-                s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
+                s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
+                         NULL);
        if (IS_ERR(s))
                return ERR_CAST(s);
        if (!s->s_root) {
-                s->s_flags = flags;
                error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error)
                        goto out_undo_sget;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0c85fae37666..1faf4cb56f39 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1258,7 +1258,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
         */
        BUG_ON(retval == -EIOCBQUEUED);
        if (dio->is_async && retval == 0 && dio->result &&
-            ((rw & READ) || (dio->result == sdio.size)))
+            ((rw == READ) || (dio->result == sdio.size)))
                retval = -EIOCBQUEUED;
        if (retval != -EIOCBQUEUED)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 534c1d46e69e..1b5d9af937df 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -32,7 +32,7 @@
 /**
 * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
 * @dentry: The ecryptfs dentry
- * @nd: The associated nameidata
+ * @flags: lookup flags
 *
 * Called when the VFS needs to revalidate a dentry. This
 * is called whenever a name lookup finds a dentry in the
@@ -42,32 +42,20 @@
 * Returns 1 if valid, 0 otherwise.
 *
 */
-static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct dentry *lower_dentry;
        struct vfsmount *lower_mnt;
-        struct dentry *dentry_save = NULL;
-        struct vfsmount *vfsmount_save = NULL;
        int rc = 1;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
        if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
                goto out;
-        if (nd) {
+        rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
-                dentry_save = nd->path.dentry;
-                vfsmount_save = nd->path.mnt;
-                nd->path.dentry = lower_dentry;
-                nd->path.mnt = lower_mnt;
-        }
-        rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
-        if (nd) {
-                nd->path.dentry = dentry_save;
-                nd->path.mnt = vfsmount_save;
-        }
        if (dentry->d_inode) {
                struct inode *lower_inode =
                        ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 867b64c5d84f..989e034f02bd 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -550,20 +550,6 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
 extern struct kmem_cache *ecryptfs_key_sig_cache;
 extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
 extern struct kmem_cache *ecryptfs_key_tfm_cache;
-extern struct kmem_cache *ecryptfs_open_req_cache;
-struct ecryptfs_open_req {
-#define ECRYPTFS_REQ_PROCESSED 0x00000001
-#define ECRYPTFS_REQ_DROPPED   0x00000002
-#define ECRYPTFS_REQ_ZOMBIE    0x00000004
-        u32 flags;
-        struct file **lower_file;
-        struct dentry *lower_dentry;
-        struct vfsmount *lower_mnt;
-        wait_queue_head_t wait;
-        struct mutex mux;
-        struct list_head kthread_ctl_list;
-};
 struct inode *ecryptfs_get_inode(struct inode *lower_inode,
                                 struct super_block *sb);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a07441a0a878..ffa2be57804d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -173,7 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode,
                inode = ERR_CAST(lower_dir_dentry);
                goto out;
        }
-        rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL);
+        rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true);
        if (rc) {
                printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
                       "rc = [%d]\n", __func__, rc);
@@ -240,7 +240,6 @@ out:
 * @dir: The inode of the directory in which to create the file.
 * @dentry: The eCryptfs dentry
 * @mode: The mode of the new file.
- * @nd: nameidata
 *
 * Creates a new file.
 *
@@ -248,7 +247,7 @@ out:
 */
 static int
 ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
-                umode_t mode, struct nameidata *nd)
+                umode_t mode, bool excl)
 {
        struct inode *ecryptfs_inode;
        int rc;
@@ -270,8 +269,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
                iput(ecryptfs_inode);
                goto out;
        }
-        d_instantiate(ecryptfs_dentry, ecryptfs_inode);
        unlock_new_inode(ecryptfs_inode);
+        d_instantiate(ecryptfs_dentry, ecryptfs_inode);
 out:
        return rc;
 }
@@ -374,7 +373,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
 */
 static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                                      struct dentry *ecryptfs_dentry,
-                                      struct nameidata *ecryptfs_nd)
+                                      unsigned int flags)
 {
        char *encrypted_and_encoded_name = NULL;
        size_t encrypted_and_encoded_name_size;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0dbe58a8b172..809e67d05ca3 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -27,7 +27,12 @@
 #include <linux/mount.h>
 #include "ecryptfs_kernel.h"
-struct kmem_cache *ecryptfs_open_req_cache;
+struct ecryptfs_open_req {
+        struct file **lower_file;
+        struct path path;
+        struct completion done;
+        struct list_head kthread_ctl_list;
+};
 static struct ecryptfs_kthread_ctl {
 #define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
@@ -67,18 +72,10 @@ static int ecryptfs_threadfn(void *ignored)
                        req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
                                               struct ecryptfs_open_req,
                                               kthread_ctl_list);
-                        mutex_lock(&req->mux);
                        list_del(&req->kthread_ctl_list);
-                        if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
+                        *req->lower_file = dentry_open(&req->path,
-                                dget(req->lower_dentry);
+                                (O_RDWR | O_LARGEFILE), current_cred());
-                                mntget(req->lower_mnt);
+                        complete(&req->done);
-                                (*req->lower_file) = dentry_open(
-                                        req->lower_dentry, req->lower_mnt,
-                                        (O_RDWR | O_LARGEFILE), current_cred());
-                                req->flags |= ECRYPTFS_REQ_PROCESSED;
-                        }
-                        wake_up(&req->wait);
-                        mutex_unlock(&req->mux);
                }
                mutex_unlock(&ecryptfs_kthread_ctl.mux);
        }
@@ -111,10 +108,9 @@ void ecryptfs_destroy_kthread(void)
        ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
        list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
                            kthread_ctl_list) {
-                mutex_lock(&req->mux);
+                list_del(&req->kthread_ctl_list);
-                req->flags |= ECRYPTFS_REQ_ZOMBIE;
+                *req->lower_file = ERR_PTR(-EIO);
-                wake_up(&req->wait);
+                complete(&req->done);
-                mutex_unlock(&req->mux);
        }
        mutex_unlock(&ecryptfs_kthread_ctl.mux);
        kthread_stop(ecryptfs_kthread);
@@ -136,34 +132,26 @@ int ecryptfs_privileged_open(struct file **lower_file,
                             struct vfsmount *lower_mnt,
                             const struct cred *cred)
 {
-        struct ecryptfs_open_req *req;
+        struct ecryptfs_open_req req;
        int flags = O_LARGEFILE;
        int rc = 0;
+        init_completion(&req.done);
+        req.lower_file = lower_file;
+        req.path.dentry = lower_dentry;
+        req.path.mnt = lower_mnt;
        /* Corresponding dput() and mntput() are done when the
         * lower file is fput() when all eCryptfs files for the inode are
         * released. */
-        dget(lower_dentry);
-        mntget(lower_mnt);
        flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
-        (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
+        (*lower_file) = dentry_open(&req.path, flags, cred);
        if (!IS_ERR(*lower_file))
                goto out;
        if ((flags & O_ACCMODE) == O_RDONLY) {
                rc = PTR_ERR((*lower_file));
                goto out;
        }
-        req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
-        if (!req) {
-                rc = -ENOMEM;
-                goto out;
-        }
-        mutex_init(&req->mux);
-        req->lower_file = lower_file;
-        req->lower_dentry = lower_dentry;
-        req->lower_mnt = lower_mnt;
-        init_waitqueue_head(&req->wait);
-        req->flags = 0;
        mutex_lock(&ecryptfs_kthread_ctl.mux);
        if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
                rc = -EIO;
@@ -171,27 +159,14 @@ int ecryptfs_privileged_open(struct file **lower_file,
                printk(KERN_ERR "%s: We are in the middle of shutting down; "
                       "aborting privileged request to open lower file\n",
                        __func__);
-                goto out_free;
+                goto out;
        }
-        list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
+        list_add_tail(&req.kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
        mutex_unlock(&ecryptfs_kthread_ctl.mux);
        wake_up(&ecryptfs_kthread_ctl.wait);
-        wait_event(req->wait, (req->flags != 0));
+        wait_for_completion(&req.done);
-        mutex_lock(&req->mux);
+        if (IS_ERR(*lower_file))
-        BUG_ON(req->flags == 0);
+                rc = PTR_ERR(*lower_file);
-        if (req->flags & ECRYPTFS_REQ_DROPPED
-            || req->flags & ECRYPTFS_REQ_ZOMBIE) {
-                rc = -EIO;
-                printk(KERN_WARNING "%s: Privileged open request dropped\n",
-                       __func__);
-                goto out_unlock;
-        }
-        if (IS_ERR(*req->lower_file))
-                rc = PTR_ERR(*req->lower_file);
-out_unlock:
-        mutex_unlock(&req->mux);
-out_free:
-        kmem_cache_free(ecryptfs_open_req_cache, req);
 out:
        return rc;
 }
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 68954937a071..1c0b3b6b75c6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -499,13 +499,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                goto out;
        }
-        s = sget(fs_type, NULL, set_anon_super, NULL);
+        s = sget(fs_type, NULL, set_anon_super, flags, NULL);
        if (IS_ERR(s)) {
                rc = PTR_ERR(s);
                goto out;
        }
-        s->s_flags = flags;
        rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
        if (rc)
                goto out1;
@@ -682,11 +681,6 @@ static struct ecryptfs_cache_info {
                .name = "ecryptfs_key_tfm_cache",
                .size = sizeof(struct ecryptfs_key_tfm),
        },
-        {
-                .cache = &ecryptfs_open_req_cache,
-                .name = "ecryptfs_open_req_cache",
-                .size = sizeof(struct ecryptfs_open_req),
-        },
 };
 static void ecryptfs_free_kmem_caches(void)
diff --git a/fs/efs/efs.h b/fs/efs/efs.h
index d8305b582ab0..5528926ac7f6 100644
--- a/fs/efs/efs.h
+++ b/fs/efs/efs.h
@@ -129,7 +129,7 @@ extern struct inode *efs_iget(struct super_block *, unsigned long);
 extern efs_block_t efs_map_block(struct inode *, efs_block_t);
 extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *efs_lookup(struct inode *, struct dentry *, unsigned int);
 extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid,
                int fh_len, int fh_type);
 extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 832b10ded82f..96f66d213a19 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -58,7 +58,8 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
        return(0);
 }
-struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) {
+struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
        efs_ino_t inodenum;
        struct inode *inode = NULL;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 74598f67efeb..1c8b55670804 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                goto error_tgt_fput;
        /* Check if EPOLLWAKEUP is allowed */
-        if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
+        if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
                epds.events &= ~EPOLLWAKEUP;
        /*
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index fc7161d6bf6b..4731fd991efe 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -46,7 +46,7 @@ static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode)
 }
 static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        struct inode *inode;
        ino_t ino;
@@ -60,7 +60,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
 }
 static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                         struct nameidata *nd)
+                         bool excl)
 {
        struct inode *inode = exofs_new_inode(dir, mode);
        int err = PTR_ERR(inode);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a2..24a49d47e935 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 out:
        ios->numdevs = devs_in_group;
        ios->pages_consumed = cur_pg;
-        if (unlikely(ret)) {
+        return ret;
-                if (length == ios->length)
-                        return ret;
-                else
-                        ios->length -= length;
-        }
-        return 0;
 }
 int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1b..5f376d14fdcc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
 {
        unsigned data_devs = sp2d->data_devs;
        unsigned group_width = data_devs + sp2d->parity;
-        unsigned p;
+        int p, c;
        if (!sp2d->needed)
                return;
-        for (p = 0; p < sp2d->pages_in_unit; p++) {
+        for (c = data_devs - 1; c >= 0; --c)
-                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+                for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
+                        struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
-                if (_1ps->write_count < group_width) {
-                        unsigned c;
-                        for (c = 0; c < data_devs; c++)
+                        if (_1ps->page_is_read[c]) {
-                                if (_1ps->page_is_read[c]) {
+                                struct page *page = _1ps->pages[c];
-                                        struct page *page = _1ps->pages[c];
-                                        r4w->put_page(priv, page);
+                                r4w->put_page(priv, page);
-                                        _1ps->page_is_read[c] = false;
+                                _1ps->page_is_read[c] = false;
-                                }
+                        }
                }
+        for (p = 0; p < sp2d->pages_in_unit; p++) {
+                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
                memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
                _1ps->write_count = 0;
                _1ps->tx = NULL;
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
 * ios->sp2d[p][*], xor is calculated the same way. These pages are
 * allocated/freed and don't go through cache
 */
-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
 {
-        struct ore_io_state *ios_read;
        struct ore_striping_info read_si;
        struct __stripe_pages_2d *sp2d = ios->sp2d;
        u64 offset = ios->si.first_stripe_start;
-        u64 last_stripe_end;
+        unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
-        unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
-        unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
-        int ret;
        if (offset == ios->offset) /* Go to start collect $200 */
                goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
        min_p = _sp2d_min_pg(sp2d);
        max_p = _sp2d_max_pg(sp2d);
+        ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+                   offset, ios->offset, min_p, max_p);
        for (c = 0; ; c++) {
                ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
                read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
        }
 read_last_stripe:
+        return 0;
+}
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+        struct ore_striping_info read_si;
+        struct __stripe_pages_2d *sp2d = ios->sp2d;
+        u64 offset;
+        u64 last_stripe_end;
+        unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+        unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
        offset = ios->offset + ios->length;
        if (offset % PAGE_SIZE)
                _add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
        c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
                       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
-        BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
-        /* unaligned IO must be within a single stripe */
        if (min_p == sp2d->pages_in_unit) {
                /* Didn't do it yet */
                min_p = _sp2d_min_pg(sp2d);
                max_p = _sp2d_max_pg(sp2d);
        }
+        ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+                   offset, last_stripe_end, min_p, max_p);
        while (offset < last_stripe_end) {
                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
@@ -568,6 +579,15 @@ read_last_stripe:
        }
 read_it:
+        return 0;
+}
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+        struct ore_io_state *ios_read;
+        unsigned i;
+        int ret;
        ios_read = ios->ios_read_4_write;
        if (!ios_read)
                return 0;
@@ -591,6 +611,8 @@ read_it:
        }
        _mark_read4write_pages_uptodate(ios_read, ret);
+        ore_put_io_state(ios_read);
+        ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
        return 0;
 }
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
                        /* If first stripe, Read in all read4write pages
                         * (if needed) before we calculate the first parity.
                         */
-                        _read_4_write(ios);
+                        _read_4_write_first_stripe(ios);
                }
+                if (!cur_len) /* If last stripe r4w pages of last stripe */
+                        _read_4_write_last_stripe(ios);
+                _read_4_write_execute(ios);
                for (i = 0; i < num_pages; i++) {
                        pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
 {
-        struct ore_layout *layout = ios->layout;
        if (ios->parity_pages) {
+                struct ore_layout *layout = ios->layout;
                unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
-                unsigned stripe_size = ios->si.bytes_in_stripe;
-                u64 last_stripe, first_stripe;
                if (_sp2d_alloc(pages_in_unit, layout->group_width,
                                layout->parity, &ios->sp2d)) {
                        return -ENOMEM;
                }
-                /* Round io down to last full strip */
-                first_stripe = div_u64(ios->offset, stripe_size);
-                last_stripe = div_u64(ios->offset + ios->length, stripe_size);
-                /* If an IO spans more then a single stripe it must end at
-                 * a stripe boundary. The reminder at the end is pushed into the
-                 * next IO.
-                 */
-                if (last_stripe != first_stripe) {
-                        ios->length = last_stripe * stripe_size - ios->offset;
-                        BUG_ON(!ios->length);
-                        ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
-                                        PAGE_SIZE;
-                        ios->si.length = ios->length; /*make it consistent */
-                }
        }
        return 0;
 }
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b0201ca6e9c6..29ab099e3e08 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -19,19 +19,19 @@
 #define dprintk(fmt, args...) do{}while(0)
-static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name,
+static int get_name(const struct path *path, char *name, struct dentry *child);
-                struct dentry *child);
 static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
                char *name, struct dentry *child)
 {
        const struct export_operations *nop = dir->d_sb->s_export_op;
+        struct path path = {.mnt = mnt, .dentry = dir};
        if (nop->get_name)
                return nop->get_name(dir, name, child);
        else
-                return get_name(mnt, dir, name, child);
+                return get_name(&path, name, child);
 }
 /*
@@ -44,13 +44,14 @@ find_acceptable_alias(struct dentry *result,
 {
        struct dentry *dentry, *toput = NULL;
        struct inode *inode;
+        struct hlist_node *p;
        if (acceptable(context, result))
                return result;
        inode = result->d_inode;
        spin_lock(&inode->i_lock);
-        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
                dget(dentry);
                spin_unlock(&inode->i_lock);
                if (toput)
@@ -248,11 +249,10 @@ static int filldir_one(void * __buf, const char * name, int len,
 * calls readdir on the parent until it finds an entry with
 * the same inode number as the child, and returns that.
 */
-static int get_name(struct vfsmount *mnt, struct dentry *dentry,
+static int get_name(const struct path *path, char *name, struct dentry *child)
-                char *name, struct dentry *child)
 {
        const struct cred *cred = current_cred();
-        struct inode *dir = dentry->d_inode;
+        struct inode *dir = path->dentry->d_inode;
        int error;
        struct file *file;
        struct getdents_callback buffer;
@@ -266,7 +266,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
        /*
         * Open the directory ...
         */
-        file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred);
+        file = dentry_open(path, O_RDONLY, cred);
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f663a67d7bf0..73b0d9519836 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 {
        int err = ext2_add_link(dentry, inode);
        if (!err) {
-                d_instantiate(dentry, inode);
                unlock_new_inode(inode);
+                d_instantiate(dentry, inode);
                return 0;
        }
        inode_dec_link_count(inode);
@@ -55,7 +55,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
 * Methods themselves.
 */
-static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode * inode;
        ino_t ino;
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
 * If the create succeeds, we fill in the inode information
 * with d_instantiate(). 
 */
-static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
+static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
 {
        struct inode *inode;
@@ -242,8 +242,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
        if (err)
                goto out_fail;
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
 out:
        return err;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b3621cb7ea31..9f311d27b16f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -771,13 +771,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
        err = -ENOMEM;
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
-                goto failed_unlock;
+                goto failed;
        sbi->s_blockgroup_lock =
                kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
        if (!sbi->s_blockgroup_lock) {
                kfree(sbi);
-                goto failed_unlock;
+                goto failed;
        }
        sb->s_fs_info = sbi;
        sbi->s_sb_block = sb_block;
@@ -1130,7 +1130,7 @@ failed_sbi:
        sb->s_fs_info = NULL;
        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
-failed_unlock:
+failed:
        return ret;
 }
@@ -1184,6 +1184,12 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
        struct ext2_sb_info *sbi = EXT2_SB(sb);
        struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+        /*
+         * Write quota structures to quota file, sync_blockdev() will write
+         * them to disk later
+         */
+        dquot_writeback_dquots(sb, -1);
        spin_lock(&sbi->s_lock);
        if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
                ext2_debug("setting valid to 0\n");
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 92490e9f85ca..c8fff930790d 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -300,10 +300,11 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
 {
        struct inode *inode = file->f_mapping->host;
        int dx_dir = is_dx_dir(inode);
+        loff_t htree_max = ext3_get_htree_eof(file);
        if (likely(dx_dir))
                return generic_file_llseek_size(file, offset, origin,
-                                                ext3_get_htree_eof(file));
+                                                htree_max, htree_max);
        else
                return generic_file_llseek(file, offset, origin);
 }
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d4dff278cbd8..b31dbd4c46ad 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -92,8 +92,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         * disk caches manually so that data really is on persistent
         * storage
         */
-        if (needs_barrier)
+        if (needs_barrier) {
-                blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+                int err;
+                err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+                if (!ret)
+                        ret = err;
+        }
 out:
        trace_ext3_sync_file_exit(inode, ret);
        return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index eeb63dfc5d20..8f4fddac01a6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1011,7 +1011,7 @@ errout:
        return NULL;
 }
-static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode * inode;
        struct ext3_dir_entry_2 * de;
@@ -1671,8 +1671,8 @@ static int ext3_add_nondir(handle_t *handle,
        int err = ext3_add_entry(handle, dentry, inode);
        if (!err) {
                ext3_mark_inode_dirty(handle, inode);
-                d_instantiate(dentry, inode);
                unlock_new_inode(inode);
+                d_instantiate(dentry, inode);
                return 0;
        }
        drop_nlink(inode);
@@ -1690,7 +1690,7 @@ static int ext3_add_nondir(handle_t *handle,
 * with d_instantiate().
 */
 static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        handle_t *handle;
        struct inode * inode;
@@ -1836,8 +1836,8 @@ out_clear_inode:
        if (err)
                goto out_clear_inode;
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
 out_stop:
        brelse(dir_block);
        ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c3a44b7c375..ff9bcdc5b0d5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2058,7 +2058,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
                goto failed_mount3;
        }
-        ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+        if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
+                sb->s_flags |= MS_RDONLY;
        EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
        ext3_orphan_cleanup(sb, es);
@@ -2526,6 +2527,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
        tid_t target;
        trace_ext3_sync_fs(sb, wait);
+        /*
+         * Writeback quota in non-journalled quota case - journalled quota has
+         * no dirty dquots
+         */
+        dquot_writeback_dquots(sb, -1);
        if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
                if (wait)
                        log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index aa39e600d159..8e07d2a5a139 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
 /*
- * ext4_dir_llseek() based on generic_file_llseek() to handle both
+ * ext4_dir_llseek() calls generic_file_llseek_size to handle htree
- * non-htree and htree directories, where the "offset" is in terms
+ * directories, where the "offset" is in terms of the filename hash
- * of the filename hash value instead of the byte offset.
+ * value instead of the byte offset.
 *
- * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
+ * Because we may return a 64-bit hash that is well beyond offset limits,
- *       will be invalid once the directory was converted into a dx directory
+ * we need to pass the max hash as the maximum allowable offset in
+ * the htree directory case.
+ *
+ * For non-htree, ext4_llseek already chooses the proper max offset.
 */
 loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
 {
        struct inode *inode = file->f_mapping->host;
-        loff_t ret = -EINVAL;
        int dx_dir = is_dx_dir(inode);
+        loff_t htree_max = ext4_get_htree_eof(file);
-        mutex_lock(&inode->i_mutex);
+        if (likely(dx_dir))
+                return generic_file_llseek_size(file, offset, origin,
-        /* NOTE: relative offsets with dx directories might not work
+                                                    htree_max, htree_max);
-         *       as expected, as it is difficult to figure out the
+        else
-         *       correct offset between dx hashes */
+                return ext4_llseek(file, offset, origin);
-        switch (origin) {
-        case SEEK_END:
-                if (unlikely(offset > 0))
-                        goto out_err; /* not supported for directories */
-                /* so only negative offsets are left, does that have a
-                 * meaning for directories at all? */
-                if (dx_dir)
-                        offset += ext4_get_htree_eof(file);
-                else
-                        offset += inode->i_size;
-                break;
-        case SEEK_CUR:
-                /*
-                 * Here we special-case the lseek(fd, 0, SEEK_CUR)
-                 * position-querying operation.  Avoid rewriting the "same"
-                 * f_pos value back to the file because a concurrent read(),
-                 * write() or lseek() might have altered it
-                 */
-                if (offset == 0) {
-                        offset = file->f_pos;
-                        goto out_ok;
-                }
-                offset += file->f_pos;
-                break;
-        }
-        if (unlikely(offset < 0))
-                goto out_err;
-        if (!dx_dir) {
-                if (offset > inode->i_sb->s_maxbytes)
-                        goto out_err;
-        } else if (offset > ext4_get_htree_eof(file))
-                goto out_err;
-        /* Special lock needed here? */
-        if (offset != file->f_pos) {
-                file->f_pos = offset;
-                file->f_version = 0;
-        }
-out_ok:
-        ret = offset;
-out_err:
-        mutex_unlock(&inode->i_mutex);
-        return ret;
 }
 /*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8c7642a00054..782eecb57e43 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -211,9 +211,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 }
 /*
- * ext4_llseek() copied from generic_file_llseek() to handle both
+ * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
- * block-mapped and extent-mapped maxbytes values. This should
+ * by calling generic_file_llseek_size() with the appropriate maxbytes
- * otherwise be identical with generic_file_llseek().
+ * value for each.
 */
 loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
 {
@@ -225,7 +225,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
        else
                maxbytes = inode->i_sb->s_maxbytes;
-        return generic_file_llseek_size(file, offset, origin, maxbytes);
+        return generic_file_llseek_size(file, offset, origin,
+                                        maxbytes, i_size_read(inode));
 }
 const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index bb6c7d811313..2a1dcea4f12e 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode)
        inode = igrab(inode);
        while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
                ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
-                dentry = NULL;
+                dentry = d_find_any_alias(inode);
-                spin_lock(&inode->i_lock);
-                if (!list_empty(&inode->i_dentry)) {
-                        dentry = list_first_entry(&inode->i_dentry,
-                                                  struct dentry, d_alias);
-                        dget(dentry);
-                }
-                spin_unlock(&inode->i_lock);
                if (!dentry)
                        break;
                next = igrab(dentry->d_parent->d_inode);
@@ -232,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        if (!journal) {
                ret = __sync_inode(inode, datasync);
-                if (!ret && !list_empty(&inode->i_dentry))
+                if (!ret && !hlist_empty(&inode->i_dentry))
                        ret = ext4_sync_parent(inode);
                goto out;
        }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f366..7f7dad787603 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
                err = ext4_move_extents(filp, donor_filp, me.orig_start,
                                        me.donor_start, me.len, &me.moved_len);
                mnt_drop_write_file(filp);
-                mnt_drop_write(filp->f_path.mnt);
                if (copy_to_user((struct move_extent __user *)arg,
                                 &me, sizeof(me)))
@@ -390,7 +389,7 @@ group_add_out:
                if (err)
                        return err;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        goto resizefs_out;
@@ -402,7 +401,7 @@ group_add_out:
                }
                if (err == 0)
                        err = err2;
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
 resizefs_out:
                ext4_resize_end(sb);
                return err;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5845cd97bf8b..d0d3f0e87f99 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1312,7 +1312,7 @@ errout:
        return NULL;
 }
-static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode;
        struct ext4_dir_entry_2 *de;
@@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle,
        int err = ext4_add_entry(handle, dentry, inode);
        if (!err) {
                ext4_mark_inode_dirty(handle, inode);
-                d_instantiate(dentry, inode);
                unlock_new_inode(inode);
+                d_instantiate(dentry, inode);
                return 0;
        }
        drop_nlink(inode);
@@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle,
 * with d_instantiate().
 */
 static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                       struct nameidata *nd)
+                       bool excl)
 {
        handle_t *handle;
        struct inode *inode;
@@ -2249,8 +2249,8 @@ out_clear_inode:
        err = ext4_mark_inode_dirty(handle, dir);
        if (err)
                goto out_clear_inode;
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
 out_stop:
        brelse(dir_block);
        ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e4ef05..d8759401ecae 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
        trace_ext4_sync_fs(sb, wait);
        flush_workqueue(sbi->dio_unwritten_wq);
+        /*
+         * Writeback quota in non-journalled quota case - journalled quota has
+         * no dirty dquots
+         */
+        dquot_writeback_dquots(sb, -1);
        if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
                if (wait)
                        jbd2_log_wait_commit(sbi->s_journal, target);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index c5938c9084b9..70d993a93805 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -201,7 +201,7 @@ static const struct dentry_operations msdos_dentry_operations = {
 /***** Get inode using directory and name */
 static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        struct super_block *sb = dir->i_sb;
        struct fat_slot_info sinfo;
@@ -265,7 +265,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 /***** Create a file */
 static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                        struct nameidata *nd)
+                        bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode = NULL;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 98ae804f5273..6cc480652433 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -41,9 +41,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
        return ret;
 }
-static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
 {
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        /* This is not negative dentry. Always valid. */
@@ -52,9 +52,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
        return vfat_revalidate_shortname(dentry);
 }
-static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
 {
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        /*
@@ -74,7 +74,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
         * This may be nfsd (or something), anyway, we can't see the
         * intent of this. So, since this can be for creation, drop it.
         */
-        if (!nd)
+        if (!flags)
                return 0;
        /*
@@ -82,7 +82,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
         * case sensitive name which is specified by user if this is
         * for creation.
         */
-        if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+        if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
                return 0;
        return vfat_revalidate_shortname(dentry);
@@ -714,7 +714,7 @@ static int vfat_d_anon_disconn(struct dentry *dentry)
 }
 static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
-                                  struct nameidata *nd)
+                                  unsigned int flags)
 {
        struct super_block *sb = dir->i_sb;
        struct fat_slot_info sinfo;
@@ -772,7 +772,7 @@ error:
 }
 static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                       struct nameidata *nd)
+                       bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e7..cf6f4345ceb0 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
 #include <linux/sched.h>
 #include <linux/pipe_fs_i.h>
-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
 {
        int cur = *cnt; 
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
                if (signal_pending(current))
                        break;
        }
+        return cur == *cnt ? -ERESTARTSYS : 0;
 }
 static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
                                 * seen a writer */
                                filp->f_version = pipe->w_counter;
                        } else {
-                                wait_for_partner(inode, &pipe->w_counter);
+                                if (wait_for_partner(inode, &pipe->w_counter))
-                                if(signal_pending(current))
                                        goto err_rd;
                        }
                }
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
                        wake_up_partner(inode);
                if (!pipe->readers) {
-                        wait_for_partner(inode, &pipe->r_counter);
+                        if (wait_for_partner(inode, &pipe->r_counter))
-                        if (signal_pending(current))
                                goto err_wr;
                }
                break;
diff --git a/fs/file_table.c b/fs/file_table.c
index a305d9e2d1b2..b3fc4d67a26b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -23,6 +23,8 @@
 #include <linux/lglock.h>
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/task_work.h>
 #include <linux/ima.h>
 #include <linux/atomic.h>
@@ -251,7 +253,6 @@ static void __fput(struct file *file)
        }
        fops_put(file->f_op);
        put_pid(file->f_owner.pid);
-        file_sb_list_del(file);
        if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_dec(inode);
        if (file->f_mode & FMODE_WRITE)
@@ -263,10 +264,77 @@ static void __fput(struct file *file)
        mntput(mnt);
 }
+static DEFINE_SPINLOCK(delayed_fput_lock);
+static LIST_HEAD(delayed_fput_list);
+static void delayed_fput(struct work_struct *unused)
+{
+        LIST_HEAD(head);
+        spin_lock_irq(&delayed_fput_lock);
+        list_splice_init(&delayed_fput_list, &head);
+        spin_unlock_irq(&delayed_fput_lock);
+        while (!list_empty(&head)) {
+                struct file *f = list_first_entry(&head, struct file, f_u.fu_list);
+                list_del_init(&f->f_u.fu_list);
+                __fput(f);
+        }
+}
+static void ____fput(struct callback_head *work)
+{
+        __fput(container_of(work, struct file, f_u.fu_rcuhead));
+}
+/*
+ * If kernel thread really needs to have the final fput() it has done
+ * to complete, call this.  The only user right now is the boot - we
+ * *do* need to make sure our writes to binaries on initramfs has
+ * not left us with opened struct file waiting for __fput() - execve()
+ * won't work without that.  Please, don't add more callers without
+ * very good reasons; in particular, never call that with locks
+ * held and never call that from a thread that might need to do
+ * some work on any kind of umount.
+ */
+void flush_delayed_fput(void)
+{
+        delayed_fput(NULL);
+}
+static DECLARE_WORK(delayed_fput_work, delayed_fput);
 void fput(struct file *file)
 {
-        if (atomic_long_dec_and_test(&file->f_count))
+        if (atomic_long_dec_and_test(&file->f_count)) {
+                struct task_struct *task = current;
+                file_sb_list_del(file);
+                if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
+                        unsigned long flags;
+                        spin_lock_irqsave(&delayed_fput_lock, flags);
+                        list_add(&file->f_u.fu_list, &delayed_fput_list);
+                        schedule_work(&delayed_fput_work);
+                        spin_unlock_irqrestore(&delayed_fput_lock, flags);
+                        return;
+                }
+                init_task_work(&file->f_u.fu_rcuhead, ____fput);
+                task_work_add(task, &file->f_u.fu_rcuhead, true);
+        }
+}
+/*
+ * synchronous analog of fput(); for kernel threads that might be needed
+ * in some umount() (and thus can't use flush_delayed_fput() without
+ * risking deadlocks), need to wait for completion of __fput() and know
+ * for this specific struct file it won't involve anything that would
+ * need them.  Use only if you really need it - at the very least,
+ * don't blindly convert fput() by kernel thread to that.
+ */
+void __fput_sync(struct file *file)
+{
+        if (atomic_long_dec_and_test(&file->f_count)) {
+                struct task_struct *task = current;
+                file_sb_list_del(file);
+                BUG_ON(!(task->flags & PF_KTHREAD));
                __fput(file);
+        }
 }
 EXPORT_SYMBOL(fput);
@@ -483,10 +551,8 @@ void mark_files_ro(struct super_block *sb)
 {
        struct file *f;
-retry:
        lg_global_lock(&files_lglock);
        do_file_list_for_each_entry(sb, f) {
-                struct vfsmount *mnt;
                if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
                       continue;
                if (!file_count(f))
@@ -499,12 +565,7 @@ retry:
                if (file_check_writeable(f) != 0)
                        continue;
                file_release_write(f);
-                mnt = mntget(f->f_path.mnt);
+                mnt_drop_write_file(f);
-                /* This can sleep, so we can't hold the spinlock. */
-                lg_global_unlock(&files_lglock);
-                mnt_drop_write(mnt);
-                mntput(mnt);
-                goto retry;
        } while_file_list_for_each_entry;
        lg_global_unlock(&files_lglock);
 }
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 3360f1e678ad..bd447e88f208 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -48,7 +48,7 @@
 #define VXFS_BLOCK_PER_PAGE(sbp)  ((PAGE_CACHE_SIZE / (sbp)->s_blocksize))
-static struct dentry *  vxfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *  vxfs_lookup(struct inode *, struct dentry *, unsigned int);
 static int              vxfs_readdir(struct file *, void *, filldir_t);
 const struct inode_operations vxfs_dir_inode_ops = {
@@ -203,7 +203,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp)
 *   in the return pointer.
 */
 static struct dentry *
-vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd)
+vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
 {
        struct inode            *ip = NULL;
        ino_t                   ino;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 41a3ccff18d8..8f660dd6137a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1315,6 +1315,8 @@ void writeback_inodes_sb_nr(struct super_block *sb,
                .reason                 = reason,
        };
+        if (sb->s_bdi == &noop_backing_dev_info)
+                return;
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
        bdi_queue_work(sb->s_bdi, &work);
        wait_for_completion(&done);
@@ -1398,6 +1400,9 @@ void sync_inodes_sb(struct super_block *sb)
                .reason         = WB_REASON_SYNC,
        };
+        /* Nothing to do? */
+        if (sb->s_bdi == &noop_backing_dev_info)
+                return;
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
        bdi_queue_work(sb->s_bdi, &work);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index e159e682ad4c..5df4775fea03 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -6,18 +6,6 @@
 #include <linux/fs_struct.h>
 #include "internal.h"
-static inline void path_get_longterm(struct path *path)
-{
-        path_get(path);
-        mnt_make_longterm(path->mnt);
-}
-static inline void path_put_longterm(struct path *path)
-{
-        mnt_make_shortterm(path->mnt);
-        path_put(path);
-}
 /*
 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
 * It can block.
@@ -26,7 +14,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
 {
        struct path old_root;
-        path_get_longterm(path);
+        path_get(path);
        spin_lock(&fs->lock);
        write_seqcount_begin(&fs->seq);
        old_root = fs->root;
@@ -34,7 +22,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
        write_seqcount_end(&fs->seq);
        spin_unlock(&fs->lock);
        if (old_root.dentry)
-                path_put_longterm(&old_root);
+                path_put(&old_root);
 }
 /*
@@ -45,7 +33,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
 {
        struct path old_pwd;
-        path_get_longterm(path);
+        path_get(path);
        spin_lock(&fs->lock);
        write_seqcount_begin(&fs->seq);
        old_pwd = fs->pwd;
@@ -54,7 +42,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
        spin_unlock(&fs->lock);
        if (old_pwd.dentry)
-                path_put_longterm(&old_pwd);
+                path_put(&old_pwd);
 }
 static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
@@ -84,7 +72,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
                        write_seqcount_end(&fs->seq);
                        while (hits--) {
                                count++;
-                                path_get_longterm(new_root);
+                                path_get(new_root);
                        }
                        spin_unlock(&fs->lock);
                }
@@ -92,13 +80,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
        } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
        while (count--)
-                path_put_longterm(old_root);
+                path_put(old_root);
 }
 void free_fs_struct(struct fs_struct *fs)
 {
-        path_put_longterm(&fs->root);
+        path_put(&fs->root);
-        path_put_longterm(&fs->pwd);
+        path_put(&fs->pwd);
        kmem_cache_free(fs_cachep, fs);
 }
@@ -132,9 +120,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
                spin_lock(&old->lock);
                fs->root = old->root;
-                path_get_longterm(&fs->root);
+                path_get(&fs->root);
                fs->pwd = old->pwd;
-                path_get_longterm(&fs->pwd);
+                path_get(&fs->pwd);
                spin_unlock(&old->lock);
        }
        return fs;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 334e0b18a014..8964cf3999b2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
 * the lookup once more.  If the lookup results in the same inode,
 * then refresh the attributes, timeouts and mark the dentry valid.
 */
-static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 {
        struct inode *inode;
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                if (!inode)
                        return 0;
-                if (nd && (nd->flags & LOOKUP_RCU))
+                if (flags & LOOKUP_RCU)
                        return -ECHILD;
                fc = get_fuse_conn(inode);
@@ -249,7 +249,7 @@ static struct dentry *fuse_d_add_directory(struct dentry *entry,
                /* This tries to shrink the subtree below alias */
                fuse_invalidate_entry(alias);
                dput(alias);
-                if (!list_empty(&inode->i_dentry))
+                if (!hlist_empty(&inode->i_dentry))
                        return ERR_PTR(-EBUSY);
        } else {
                dput(alias);
@@ -316,7 +316,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
 }
 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
-                                  struct nameidata *nd)
+                                  unsigned int flags)
 {
        int err;
        struct fuse_entry_out outarg;
@@ -370,7 +370,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 * 'mknod' + 'open' requests.
 */
 static int fuse_create_open(struct inode *dir, struct dentry *entry,
-                            umode_t mode, struct nameidata *nd)
+                            struct file *file, unsigned flags,
+                            umode_t mode, int *opened)
 {
        int err;
        struct inode *inode;
@@ -381,15 +382,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        struct fuse_open_out outopen;
        struct fuse_entry_out outentry;
        struct fuse_file *ff;
-        struct file *file;
-        int flags = nd->intent.open.flags;
-        if (fc->no_create)
-                return -ENOSYS;
        forget = fuse_alloc_forget();
+        err = -ENOMEM;
        if (!forget)
-                return -ENOMEM;
+                goto out_err;
        req = fuse_get_req(fc);
        err = PTR_ERR(req);
@@ -428,11 +425,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        req->out.args[1].value = &outopen;
        fuse_request_send(fc, req);
        err = req->out.h.error;
-        if (err) {
+        if (err)
-                if (err == -ENOSYS)
-                        fc->no_create = 1;
                goto out_free_ff;
-        }
        err = -EIO;
        if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
@@ -448,28 +442,74 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
                flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
                fuse_sync_release(ff, flags);
                fuse_queue_forget(fc, forget, outentry.nodeid, 1);
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto out_err;
        }
        kfree(forget);
        d_instantiate(entry, inode);
        fuse_change_entry_timeout(entry, &outentry);
        fuse_invalidate_attr(dir);
-        file = lookup_instantiate_filp(nd, entry, generic_file_open);
+        err = finish_open(file, entry, generic_file_open, opened);
-        if (IS_ERR(file)) {
+        if (err) {
                fuse_sync_release(ff, flags);
-                return PTR_ERR(file);
+        } else {
+                file->private_data = fuse_file_get(ff);
+                fuse_finish_open(inode, file);
        }
-        file->private_data = fuse_file_get(ff);
+        return err;
-        fuse_finish_open(inode, file);
-        return 0;
- out_free_ff:
+out_free_ff:
        fuse_file_free(ff);
- out_put_request:
+out_put_request:
        fuse_put_request(fc, req);
- out_put_forget_req:
+out_put_forget_req:
        kfree(forget);
+out_err:
+        return err;
+}
+static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
+                            struct file *file, unsigned flags,
+                            umode_t mode, int *opened)
+{
+        int err;
+        struct fuse_conn *fc = get_fuse_conn(dir);
+        struct dentry *res = NULL;
+        if (d_unhashed(entry)) {
+                res = fuse_lookup(dir, entry, 0);
+                if (IS_ERR(res))
+                        return PTR_ERR(res);
+                if (res)
+                        entry = res;
+        }
+        if (!(flags & O_CREAT) || entry->d_inode)
+                goto no_open;
+        /* Only creates */
+        *opened |= FILE_CREATED;
+        if (fc->no_create)
+                goto mknod;
+        err = fuse_create_open(dir, entry, file, flags, mode, opened);
+        if (err == -ENOSYS) {
+                fc->no_create = 1;
+                goto mknod;
+        }
+out_dput:
+        dput(res);
        return err;
+mknod:
+        err = fuse_mknod(dir, entry, mode, 0);
+        if (err)
+                goto out_dput;
+no_open:
+        return finish_no_open(file, res);
 }
 /*
@@ -571,14 +611,8 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
 }
 static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
-                       struct nameidata *nd)
+                       bool excl)
 {
-        if (nd) {
-                int err = fuse_create_open(dir, entry, mode, nd);
-                if (err != -ENOSYS)
-                        return err;
-                /* Fall back on mknod */
-        }
        return fuse_mknod(dir, entry, mode, 0);
 }
@@ -1646,6 +1680,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
        .link           = fuse_link,
        .setattr        = fuse_setattr,
        .create         = fuse_create,
+        .atomic_open    = fuse_atomic_open,
        .mknod          = fuse_mknod,
        .permission     = fuse_permission,
        .getattr        = fuse_getattr,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e80a464850c8..d6526347d386 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
        int alloc_required;
        int error = 0;
-        struct gfs2_qadata *qa = NULL;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
        struct page *page;
@@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
                gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
        if (alloc_required) {
-                qa = gfs2_qadata_get(ip);
-                if (!qa) {
-                        error = -ENOMEM;
-                        goto out_unlock;
-                }
                error = gfs2_quota_lock_check(ip);
                if (error)
-                        goto out_alloc_put;
+                        goto out_unlock;
                error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
                if (error)
@@ -708,8 +701,6 @@ out_trans_fail:
                gfs2_inplace_release(ip);
 out_qunlock:
                gfs2_quota_unlock(ip);
-out_alloc_put:
-                gfs2_qadata_put(ip);
        }
 out_unlock:
        if (&ip->i_inode == sdp->sd_rindex) {
@@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
        struct buffer_head *dibh;
-        struct gfs2_qadata *qa = ip->i_qadata;
        unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
        unsigned int to = from + len;
        int ret;
@@ -878,12 +868,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        brelse(dibh);
 failed:
        gfs2_trans_end(sdp);
-        if (ip->i_res)
+        if (gfs2_mb_reserved(ip))
                gfs2_inplace_release(ip);
-        if (qa) {
+        if (ip->i_res->rs_qa_qd_num)
                gfs2_quota_unlock(ip);
-                gfs2_qadata_put(ip);
-        }
        if (inode == sdp->sd_rindex) {
                gfs2_glock_dq(&m_ip->i_gh);
                gfs2_holder_uninit(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index dab54099dd98..49cd7dd4a9fa 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        if (error)
                goto out_rlist;
+        if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
+                gfs2_rs_deltree(ip->i_res);
        error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
                                 RES_INDIRECT + RES_STATFS + RES_QUOTA,
                                 revokes);
@@ -1045,12 +1048,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
                lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
        find_metapath(sdp, lblock, &mp, ip->i_height);
-        if (!gfs2_qadata_get(ip))
+        error = gfs2_rindex_update(sdp);
-                return -ENOMEM;
+        if (error)
+                return error;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-                goto out;
+                return error;
        while (height--) {
                struct strip_mine sm;
@@ -1064,8 +1068,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
        gfs2_quota_unhold(ip);
-out:
-        gfs2_qadata_put(ip);
        return error;
 }
@@ -1167,19 +1169,14 @@ static int do_grow(struct inode *inode, u64 size)
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct buffer_head *dibh;
-        struct gfs2_qadata *qa = NULL;
        int error;
        int unstuff = 0;
        if (gfs2_is_stuffed(ip) &&
            (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-                qa = gfs2_qadata_get(ip);
-                if (qa == NULL)
-                        return -ENOMEM;
                error = gfs2_quota_lock_check(ip);
                if (error)
-                        goto do_grow_alloc_put;
+                        return error;
                error = gfs2_inplace_reserve(ip, 1);
                if (error)
@@ -1214,8 +1211,6 @@ do_grow_release:
                gfs2_inplace_release(ip);
 do_grow_qunlock:
                gfs2_quota_unlock(ip);
-do_grow_alloc_put:
-                gfs2_qadata_put(ip);
        }
        return error;
 }
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 0da8da2c991d..4fddb3c22d25 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -25,7 +25,7 @@
 /**
 * gfs2_drevalidate - Check directory lookup consistency
 * @dentry: the mapping to check
- * @nd:
+ * @flags: lookup flags
 *
 * Check to make sure the lookup necessary to arrive at this inode from its
 * parent is still good.
@@ -33,7 +33,7 @@
 * Returns: 1 if the dentry is ok, 0 if it isn't
 */
-static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
+static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
 {
        struct dentry *parent;
        struct gfs2_sbd *sdp;
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
        int error;
        int had_lock = 0;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        parent = dget_parent(dentry);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8aaeb07a07b5..259b088cfc4c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (!ht)
                return -ENOMEM;
-        if (!gfs2_qadata_get(dip)) {
-                error = -ENOMEM;
-                goto out;
-        }
        error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-                goto out_put;
+                goto out;
        /*  Count the number of leaves  */
        bh = leaf_bh;
@@ -1942,8 +1937,6 @@ out_rg_gunlock:
 out_rlist:
        gfs2_rlist_free(&rlist);
        gfs2_quota_unhold(dip);
-out_put:
-        gfs2_qadata_put(dip);
 out:
        kfree(ht);
        return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 31b199f6efc1..9aa6af13823c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = {
        [7] = GFS2_DIF_NOATIME,
        [12] = GFS2_DIF_EXHASH,
        [14] = GFS2_DIF_INHERIT_JDATA,
+        [17] = GFS2_DIF_TOPDIR,
 };
 static const u32 gfs2_to_fsflags[32] = {
@@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = {
        [gfs2fl_AppendOnly] = FS_APPEND_FL,
        [gfs2fl_NoAtime] = FS_NOATIME_FL,
        [gfs2fl_ExHash] = FS_INDEX_FL,
+        [gfs2fl_TopLevel] = FS_TOPDIR_FL,
        [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
@@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode)
                             GFS2_DIF_NOATIME|                  \
                             GFS2_DIF_SYNC|                     \
                             GFS2_DIF_SYSTEM|                   \
+                             GFS2_DIF_TOPDIR|                   \
                             GFS2_DIF_INHERIT_JDATA)
 /**
@@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
        gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
        if (!S_ISDIR(inode->i_mode)) {
+                gfsflags &= ~GFS2_DIF_TOPDIR;
                if (gfsflags & GFS2_DIF_INHERIT_JDATA)
                        gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
                return do_gfs2_set_flags(filp, gfsflags, ~0);
@@ -366,7 +370,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 pos = page->index << PAGE_CACHE_SHIFT;
        unsigned int data_blocks, ind_blocks, rblocks;
        struct gfs2_holder gh;
-        struct gfs2_qadata *qa;
        loff_t size;
        int ret;
@@ -376,6 +379,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
         */
        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+        ret = gfs2_rs_alloc(ip);
+        if (ret)
+                return ret;
+        atomic_set(&ip->i_res->rs_sizehint,
+                   PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        ret = gfs2_glock_nq(&gh);
        if (ret)
@@ -393,14 +403,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
                goto out_unlock;
        }
-        ret = -ENOMEM;
+        ret = gfs2_rindex_update(sdp);
-        qa = gfs2_qadata_get(ip);
+        if (ret)
-        if (qa == NULL)
                goto out_unlock;
        ret = gfs2_quota_lock_check(ip);
        if (ret)
-                goto out_alloc_put;
+                goto out_unlock;
        gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
        ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
        if (ret)
@@ -447,8 +456,6 @@ out_trans_fail:
        gfs2_inplace_release(ip);
 out_quota_unlock:
        gfs2_quota_unlock(ip);
-out_alloc_put:
-        gfs2_qadata_put(ip);
 out_unlock:
        gfs2_glock_dq(&gh);
 out:
@@ -567,16 +574,14 @@ fail:
 static int gfs2_release(struct inode *inode, struct file *file)
 {
-        struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_file *fp;
-        fp = file->private_data;
+        kfree(file->private_data);
        file->private_data = NULL;
-        if (gfs2_assert_warn(sdp, fp))
+        if ((file->f_mode & FMODE_WRITE) &&
-                return -EIO;
+            (atomic_read(&inode->i_writecount) == 1))
+                gfs2_rs_delete(ip);
-        kfree(fp);
        return 0;
 }
@@ -653,12 +658,20 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                                   unsigned long nr_segs, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
+        size_t writesize = iov_length(iov, nr_segs);
+        struct dentry *dentry = file->f_dentry;
+        struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+        struct gfs2_sbd *sdp;
+        int ret;
+        sdp = GFS2_SB(file->f_mapping->host);
+        ret = gfs2_rs_alloc(ip);
+        if (ret)
+                return ret;
+        atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
        if (file->f_flags & O_APPEND) {
-                struct dentry *dentry = file->f_dentry;
-                struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
                struct gfs2_holder gh;
-                int ret;
                ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
                if (ret)
@@ -751,7 +764,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
        loff_t bytes, max_bytes;
-        struct gfs2_qadata *qa;
        int error;
        const loff_t pos = offset;
        const loff_t count = len;
@@ -774,11 +786,17 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        if (bytes == 0)
                bytes = sdp->sd_sb.sb_bsize;
+        error = gfs2_rs_alloc(ip);
+        if (error)
+                return error;
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
        error = gfs2_glock_nq(&ip->i_gh);
        if (unlikely(error))
                goto out_uninit;
+        atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
        while (len > 0) {
                if (len < bytes)
                        bytes = len;
@@ -787,15 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
                        offset += bytes;
                        continue;
                }
-                qa = gfs2_qadata_get(ip);
-                if (!qa) {
-                        error = -ENOMEM;
-                        goto out_unlock;
-                }
                error = gfs2_quota_lock_check(ip);
                if (error)
-                        goto out_alloc_put;
+                        goto out_unlock;
 retry:
                gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
@@ -835,7 +847,6 @@ retry:
                offset += max_bytes;
                gfs2_inplace_release(ip);
                gfs2_quota_unlock(ip);
-                gfs2_qadata_put(ip);
        }
        if (error == 0)
@@ -846,8 +857,6 @@ out_trans_fail:
        gfs2_inplace_release(ip);
 out_qunlock:
        gfs2_quota_unlock(ip);
-out_alloc_put:
-        gfs2_qadata_put(ip);
 out_unlock:
        gfs2_glock_dq(&ip->i_gh);
 out_uninit:
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dab2526071cc..1ed81f40da0d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -46,10 +46,11 @@
 #include "trace_gfs2.h"
 struct gfs2_glock_iter {
-        int hash;                       /* hash bucket index         */
+        int hash;                       /* hash bucket index           */
-        struct gfs2_sbd *sdp;           /* incore superblock         */
+        unsigned nhash;                 /* Index within current bucket */
-        struct gfs2_glock *gl;          /* current glock struct      */
+        struct gfs2_sbd *sdp;           /* incore superblock           */
-        char string[512];               /* scratch space             */
+        struct gfs2_glock *gl;          /* current glock struct        */
+        loff_t last_pos;                /* last position               */
 };
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -767,6 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
        gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
        gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
        memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+        memset(gl->gl_lvb, 0, 32 * sizeof(char));
        gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
        gl->gl_tchange = jiffies;
        gl->gl_object = NULL;
@@ -948,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
        va_start(args, fmt);
        if (seq) {
-                struct gfs2_glock_iter *gi = seq->private;
+                seq_vprintf(seq, fmt, args);
-                vsprintf(gi->string, fmt, args);
-                seq_printf(seq, gi->string);
        } else {
                vaf.fmt = fmt;
                vaf.va = &args;
@@ -1854,8 +1854,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                gl = gi->gl;
                if (gl) {
                        gi->gl = glock_hash_next(gl);
+                        gi->nhash++;
                } else {
+                        if (gi->hash >= GFS2_GL_HASH_SIZE) {
+                                rcu_read_unlock();
+                                return 1;
+                        }
                        gi->gl = glock_hash_chain(gi->hash);
+                        gi->nhash = 0;
                }
                while (gi->gl == NULL) {
                        gi->hash++;
@@ -1864,6 +1870,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                                return 1;
                        }
                        gi->gl = glock_hash_chain(gi->hash);
+                        gi->nhash = 0;
                }
        /* Skip entries for other sb and dead entries */
        } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
@@ -1876,7 +1883,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
        struct gfs2_glock_iter *gi = seq->private;
        loff_t n = *pos;
-        gi->hash = 0;
+        if (gi->last_pos <= *pos)
+                n = gi->nhash + (*pos - gi->last_pos);
+        else
+                gi->hash = 0;
+        gi->nhash = 0;
        rcu_read_lock();
        do {
@@ -1884,6 +1896,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
                        return NULL;
        } while (n--);
+        gi->last_pos = *pos;
        return gi->gl;
 }
@@ -1893,7 +1906,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
        struct gfs2_glock_iter *gi = seq->private;
        (*pos)++;
+        gi->last_pos = *pos;
        if (gfs2_glock_iter_next(gi))
                return NULL;
@@ -1964,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
        .show  = gfs2_sbstats_seq_show,
 };
+#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
 static int gfs2_glocks_open(struct inode *inode, struct file *file)
 {
        int ret = seq_open_private(file, &gfs2_glock_seq_ops,
@@ -1972,6 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
                struct seq_file *seq = file->private_data;
                struct gfs2_glock_iter *gi = seq->private;
                gi->sdp = inode->i_private;
+                seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
+                if (seq->buf)
+                        seq->size = GFS2_SEQ_GOODSIZE;
        }
        return ret;
 }
@@ -1984,6 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
                struct seq_file *seq = file->private_data;
                struct gfs2_glock_iter *gi = seq->private;
                gi->sdp = inode->i_private;
+                seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
+                if (seq->buf)
+                        seq->size = GFS2_SEQ_GOODSIZE;
        }
        return ret;
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67fd6beffece..aaecc8085fc5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -84,17 +84,22 @@ struct gfs2_rgrpd {
        u32 rd_data;                    /* num of data blocks in rgrp */
        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        u32 rd_free;
+        u32 rd_reserved;                /* number of blocks reserved */
        u32 rd_free_clone;
        u32 rd_dinodes;
        u64 rd_igeneration;
        struct gfs2_bitmap *rd_bits;
        struct gfs2_sbd *rd_sbd;
+        struct gfs2_rgrp_lvb *rd_rgl;
        u32 rd_last_alloc;
        u32 rd_flags;
 #define GFS2_RDF_CHECK          0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE       0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR          0x40000000 /* error in rg */
 #define GFS2_RDF_MASK           0xf0000000 /* mask for internal flags */
+        spinlock_t rd_rsspin;           /* protects reservation related vars */
+        struct rb_root rd_rstree;       /* multi-block reservation tree */
+        u32 rd_rs_cnt;                  /* count of current reservations */
 };
 enum gfs2_state_bits {
@@ -232,6 +237,38 @@ struct gfs2_holder {
        unsigned long gh_ip;
 };
+/* Resource group multi-block reservation, in order of appearance:
+   Step 1. Function prepares to write, allocates a mb, sets the size hint.
+   Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
+   Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
+   Step 4. Bits are assigned from the rgrp based on either the reservation
+           or wherever it can.
+*/
+struct gfs2_blkreserv {
+        /* components used during write (step 1): */
+        atomic_t rs_sizehint;         /* hint of the write size */
+        /* components used during inplace_reserve (step 2): */
+        u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
+        /* components used during get_local_rgrp (step 3): */
+        struct gfs2_rgrpd *rs_rgd;    /* pointer to the gfs2_rgrpd */
+        struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
+        struct rb_node rs_node;       /* link to other block reservations */
+        /* components used during block searches and assignments (step 4): */
+        struct gfs2_bitmap *rs_bi;    /* bitmap for the current allocation */
+        u32 rs_biblk;                 /* start block relative to the bi */
+        u32 rs_free;                  /* how many blocks are still free */
+        /* ancillary quota stuff */
+        struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
+        struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
+        unsigned int rs_qa_qd_num;
+};
 enum {
        GLF_LOCK                        = 1,
        GLF_DEMOTE                      = 3,
@@ -289,18 +326,6 @@ struct gfs2_glock {
 #define GFS2_MIN_LVB_SIZE 32    /* Min size of LVB that gfs2 supports */
-struct gfs2_qadata { /* quota allocation data */
-        /* Quota stuff */
-        struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
-        struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
-        unsigned int qa_qd_num;
-};
-struct gfs2_blkreserv {
-        u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-        struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
-};
 enum {
        GIF_INVALID             = 0,
        GIF_QD_LOCKED           = 1,
@@ -308,7 +333,6 @@ enum {
        GIF_SW_PAGED            = 3,
 };
 struct gfs2_inode {
        struct inode i_inode;
        u64 i_no_addr;
@@ -319,8 +343,7 @@ struct gfs2_inode {
        struct gfs2_glock *i_gl; /* Move into i_gh? */
        struct gfs2_holder i_iopen_gh;
        struct gfs2_holder i_gh; /* for prepare/commit_write only */
-        struct gfs2_qadata *i_qadata; /* quota allocation data */
+        struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
-        struct gfs2_blkreserv *i_res; /* resource group block reservation */
        struct gfs2_rgrpd *i_rgd;
        u64 i_goal;     /* goal block for allocations */
        struct rw_semaphore i_rw_mutex;
@@ -473,6 +496,7 @@ struct gfs2_args {
        unsigned int ar_discard:1;              /* discard requests */
        unsigned int ar_errors:2;               /* errors=withdraw | panic */
        unsigned int ar_nobarrier:1;            /* do not send barriers */
+        unsigned int ar_rgrplvb:1;              /* use lvbs for rgrp info */
        int ar_commit;                          /* Commit interval */
        int ar_statfs_quantum;                  /* The fast statfs interval */
        int ar_quota_quantum;                   /* The quota interval */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a9ba2444e077..4ce22e547308 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,12 +521,13 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        int error;
        munge_mode_uid_gid(dip, &mode, &uid, &gid);
-        if (!gfs2_qadata_get(dip))
+        error = gfs2_rindex_update(sdp);
-                return -ENOMEM;
+        if (error)
+                return error;
        error = gfs2_quota_lock(dip, uid, gid);
        if (error)
-                goto out;
+                return error;
        error = gfs2_quota_check(dip, uid, gid);
        if (error)
@@ -542,8 +543,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 out_quota:
        gfs2_quota_unlock(dip);
-out:
-        gfs2_qadata_put(dip);
        return error;
 }
@@ -551,14 +550,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                       struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct gfs2_qadata *qa;
        int alloc_required;
        struct buffer_head *dibh;
        int error;
-        qa = gfs2_qadata_get(dip);
+        error = gfs2_rindex_update(sdp);
-        if (!qa)
+        if (error)
-                return -ENOMEM;
+                return error;
        error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
@@ -605,13 +603,13 @@ fail_end_trans:
        gfs2_trans_end(sdp);
 fail_ipreserv:
-        gfs2_inplace_release(dip);
+        if (alloc_required)
+                gfs2_inplace_release(dip);
 fail_quota_locks:
        gfs2_quota_unlock(dip);
 fail:
-        gfs2_qadata_put(dip);
        return error;
 }
@@ -657,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        const struct qstr *name = &dentry->d_name;
        struct gfs2_holder ghs[2];
        struct inode *inode = NULL;
-        struct gfs2_inode *dip = GFS2_I(dir);
+        struct gfs2_inode *dip = GFS2_I(dir), *ip;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
@@ -667,6 +665,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        if (!name->len || name->len > GFS2_FNAMESIZE)
                return -ENAMETOOLONG;
+        /* We need a reservation to allocate the new dinode block. The
+           directory ip temporarily points to the reservation, but this is
+           being done to get a set of contiguous blocks for the new dinode.
+           Since this is a create, we don't have a sizehint yet, so it will
+           have to use the minimum reservation size. */
+        error = gfs2_rs_alloc(dip);
+        if (error)
+                return error;
        error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        if (error)
                goto fail;
@@ -700,19 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(inode))
                goto fail_gunlock2;
-        error = gfs2_inode_refresh(GFS2_I(inode));
+        ip = GFS2_I(inode);
+        error = gfs2_inode_refresh(ip);
        if (error)
                goto fail_gunlock2;
+        /* The newly created inode needs a reservation so it can allocate
+           xattrs. At the same time, we want new blocks allocated to the new
+           dinode to be as contiguous as possible. Since we allocated the
+           dinode block under the directory's reservation, we transfer
+           ownership of that reservation to the new inode. The directory
+           doesn't need a reservation unless it needs a new allocation. */
+        ip->i_res = dip->i_res;
+        dip->i_res = NULL;
        error = gfs2_acl_create(dip, inode);
        if (error)
                goto fail_gunlock2;
-        error = gfs2_security_init(dip, GFS2_I(inode), name);
+        error = gfs2_security_init(dip, ip, name);
        if (error)
                goto fail_gunlock2;
-        error = link_dinode(dip, name, GFS2_I(inode));
+        error = link_dinode(dip, name, ip);
        if (error)
                goto fail_gunlock2;
@@ -722,10 +739,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        gfs2_trans_end(sdp);
        /* Check if we reserved space in the rgrp. Function link_dinode may
           not, depending on whether alloc is required. */
-        if (dip->i_res)
+        if (gfs2_mb_reserved(dip))
                gfs2_inplace_release(dip);
        gfs2_quota_unlock(dip);
-        gfs2_qadata_put(dip);
        mark_inode_dirty(inode);
        gfs2_glock_dq_uninit_m(2, ghs);
        d_instantiate(dentry, inode);
@@ -740,6 +756,7 @@ fail_gunlock:
                iput(inode);
        }
 fail:
+        gfs2_rs_delete(dip);
        if (bh)
                brelse(bh);
        return error;
@@ -755,11 +772,8 @@ fail:
 */
 static int gfs2_create(struct inode *dir, struct dentry *dentry,
-                       umode_t mode, struct nameidata *nd)
+                       umode_t mode, bool excl)
 {
-        int excl = 0;
-        if (nd && (nd->flags & LOOKUP_EXCL))
-                excl = 1;
        return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl);
 }
@@ -775,7 +789,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
 */
 static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
-                                  struct nameidata *nd)
+                                  unsigned int flags)
 {
        struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
        if (inode && !IS_ERR(inode)) {
@@ -819,6 +833,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (S_ISDIR(inode->i_mode))
                return -EPERM;
+        error = gfs2_rs_alloc(dip);
+        if (error)
+                return error;
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
@@ -870,16 +888,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        error = 0;
        if (alloc_required) {
-                struct gfs2_qadata *qa = gfs2_qadata_get(dip);
-                if (!qa) {
-                        error = -ENOMEM;
-                        goto out_gunlock;
-                }
                error = gfs2_quota_lock_check(dip);
                if (error)
-                        goto out_alloc;
+                        goto out_gunlock;
                error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
                if (error)
@@ -922,9 +933,6 @@ out_ipres:
 out_gunlock_q:
        if (alloc_required)
                gfs2_quota_unlock(dip);
-out_alloc:
-        if (alloc_required)
-                gfs2_qadata_put(dip);
 out_gunlock:
        gfs2_glock_dq(ghs + 1);
 out_child:
@@ -1234,6 +1242,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        if (error)
                return error;
+        error = gfs2_rs_alloc(ndip);
+        if (error)
+                return error;
        if (odip != ndip) {
                error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
                                           0, &r_gh);
@@ -1357,16 +1369,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                goto out_gunlock;
        if (alloc_required) {
-                struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
-                if (!qa) {
-                        error = -ENOMEM;
-                        goto out_gunlock;
-                }
                error = gfs2_quota_lock_check(ndip);
                if (error)
-                        goto out_alloc;
+                        goto out_gunlock;
                error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
                if (error)
@@ -1427,9 +1432,6 @@ out_ipreserv:
 out_gunlock_q:
        if (alloc_required)
                gfs2_quota_unlock(ndip);
-out_alloc:
-        if (alloc_required)
-                gfs2_qadata_put(ndip);
 out_gunlock:
        while (x--) {
                gfs2_glock_dq(ghs + x);
@@ -1590,12 +1592,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
                ogid = ngid = NO_QUOTA_CHANGE;
-        if (!gfs2_qadata_get(ip))
-                return -ENOMEM;
        error = gfs2_quota_lock(ip, nuid, ngid);
        if (error)
-                goto out_alloc;
+                return error;
        if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
                error = gfs2_quota_check(ip, nuid, ngid);
@@ -1621,8 +1620,6 @@ out_end_trans:
        gfs2_trans_end(sdp);
 out_gunlock_q:
        gfs2_quota_unlock(ip);
-out_alloc:
-        gfs2_qadata_put(ip);
        return error;
 }
@@ -1644,6 +1641,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
        struct gfs2_holder i_gh;
        int error;
+        error = gfs2_rs_alloc(ip);
+        if (error)
+                return error;
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
        if (error)
                return error;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 852c1be1dd3b..8ff95a2d54ee 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
                goto out;
        set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
        set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-        gfs2_meta_check(sdp, bd->bd_bh);
-        gfs2_pin(sdp, bd->bd_bh);
        mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
+        if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
+                printk(KERN_ERR
+                       "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
+                       (unsigned long long)bd->bd_bh->b_blocknr);
+                BUG();
+        }
+        gfs2_pin(sdp, bd->bd_bh);
        mh->__pad0 = cpu_to_be64(0);
        mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
        sdp->sd_log_num_buf++;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 6cdb0f2a1b09..e04d0e09ee7b 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo)
        inode_init_once(&ip->i_inode);
        init_rwsem(&ip->i_rw_mutex);
        INIT_LIST_HEAD(&ip->i_trunc_list);
-        ip->i_qadata = NULL;
        ip->i_res = NULL;
        ip->i_hash_cache = NULL;
 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 6c1e5d1c404a..3a56c8d94de0 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct buffer_head *bh;
-        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+                *bhp = NULL;
                return -EIO;
+        }
        *bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
@@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
                if (tr && tr->tr_touched)
                        gfs2_io_error_bh(sdp, bh);
                brelse(bh);
+                *bhp = NULL;
                return -EIO;
        }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b8c250fc4922..e5af9dc420ef 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
        }
        error = init_names(sdp, silent);
-        if (error)
+        if (error) {
-                goto fail;
+                /* In this case, we haven't initialized sysfs, so we have to
+                   manually free the sdp. */
+                free_percpu(sdp->sd_lkstats);
+                kfree(sdp);
+                sb->s_fs_info = NULL;
+                return error;
+        }
        snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
-        gfs2_create_debugfs_file(sdp);
        error = gfs2_sys_fs_add(sdp);
+        /*
+         * If we hit an error here, gfs2_sys_fs_add will have called function
+         * kobject_put which causes the sysfs usage count to go to zero, which
+         * causes sysfs to call function gfs2_sbd_release, which frees sdp.
+         * Subsequent error paths here will call gfs2_sys_fs_del, which also
+         * kobject_put to free sdp.
+         */
        if (error)
-                goto fail;
+                return error;
+        gfs2_create_debugfs_file(sdp);
        error = gfs2_lm_mount(sdp, silent);
        if (error)
-                goto fail_sys;
+                goto fail_debug;
        error = init_locking(sdp, &mount_gh, DO);
        if (error)
@@ -1215,12 +1228,12 @@ fail_locking:
 fail_lm:
        gfs2_gl_hash_clear(sdp);
        gfs2_lm_unmount(sdp);
-fail_sys:
+fail_debug:
-        gfs2_sys_fs_del(sdp);
-fail:
        gfs2_delete_debugfs_file(sdp);
        free_percpu(sdp->sd_lkstats);
-        kfree(sdp);
+        /* gfs2_sys_fs_del must be the last thing we do, since it causes
+         * sysfs to call function gfs2_sbd_release, which frees sdp. */
+        gfs2_sys_fs_del(sdp);
        sb->s_fs_info = NULL;
        return error;
 }
@@ -1286,7 +1299,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
                error = -EBUSY;
                goto error_bdev;
        }
-        s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev);
+        s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
        error = PTR_ERR(s);
        if (IS_ERR(s))
@@ -1316,7 +1329,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
        } else {
                char b[BDEVNAME_SIZE];
-                s->s_flags = flags;
                s->s_mode = mode;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
                sb_set_blocksize(s, block_size(bdev));
@@ -1360,7 +1372,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
                       dev_name, error);
                return ERR_PTR(error);
        }
-        s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
+        s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
                 path.dentry->d_inode->i_sb->s_bdev);
        path_put(&path);
        if (IS_ERR(s)) {
@@ -1390,10 +1402,9 @@ static void gfs2_kill_sb(struct super_block *sb)
        sdp->sd_root_dir = NULL;
        sdp->sd_master_dir = NULL;
        shrink_dcache_sb(sb);
-        kill_block_super(sb);
        gfs2_delete_debugfs_file(sdp);
        free_percpu(sdp->sd_lkstats);
-        kfree(sdp);
+        kill_block_super(sb);
 }
 struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b97178e7d397..a3bde91645c2 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_qadata *qa = ip->i_qadata;
+        struct gfs2_quota_data **qd;
-        struct gfs2_quota_data **qd = qa->qa_qd;
        int error;
-        if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
+        if (ip->i_res == NULL)
+                gfs2_rs_alloc(ip);
+        qd = ip->i_res->rs_qa_qd;
+        if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
            gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
                return -EIO;
@@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
        error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
        if (error)
                goto out;
-        qa->qa_qd_num++;
+        ip->i_res->rs_qa_qd_num++;
        qd++;
        error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
        if (error)
                goto out;
-        qa->qa_qd_num++;
+        ip->i_res->rs_qa_qd_num++;
        qd++;
        if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
                error = qdsb_get(sdp, QUOTA_USER, uid, qd);
                if (error)
                        goto out;
-                qa->qa_qd_num++;
+                ip->i_res->rs_qa_qd_num++;
                qd++;
        }
@@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
                error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
                if (error)
                        goto out;
-                qa->qa_qd_num++;
+                ip->i_res->rs_qa_qd_num++;
                qd++;
        }
@@ -542,16 +546,17 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_qadata *qa = ip->i_qadata;
        unsigned int x;
+        if (ip->i_res == NULL)
+                return;
        gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
-        for (x = 0; x < qa->qa_qd_num; x++) {
+        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-                qdsb_put(qa->qa_qd[x]);
+                qdsb_put(ip->i_res->rs_qa_qd[x]);
-                qa->qa_qd[x] = NULL;
+                ip->i_res->rs_qa_qd[x] = NULL;
        }
-        qa->qa_qd_num = 0;
+        ip->i_res->rs_qa_qd_num = 0;
 }
 static int sort_qd(const void *a, const void *b)
@@ -764,6 +769,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        unsigned int nalloc = 0, blocks;
        int error;
+        error = gfs2_rs_alloc(ip);
+        if (error)
+                return error;
        gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
                              &data_blocks, &ind_blocks);
@@ -915,7 +924,6 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        unsigned int x;
        int error = 0;
@@ -928,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
            sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
-        sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
+        sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
-             sort_qd, NULL);
+             sizeof(struct gfs2_quota_data *), sort_qd, NULL);
-        for (x = 0; x < qa->qa_qd_num; x++) {
+        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
                int force = NO_FORCE;
-                qd = qa->qa_qd[x];
+                qd = ip->i_res->rs_qa_qd[x];
                if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
                        force = FORCE;
-                error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
+                error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
                if (error)
                        break;
        }
@@ -945,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
                set_bit(GIF_QD_LOCKED, &ip->i_flags);
        else {
                while (x--)
-                        gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+                        gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
                gfs2_quota_unhold(ip);
        }
@@ -990,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd)
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qda[4];
        unsigned int count = 0;
        unsigned int x;
@@ -998,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
        if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
                goto out;
-        for (x = 0; x < qa->qa_qd_num; x++) {
+        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
                int sync;
-                qd = qa->qa_qd[x];
+                qd = ip->i_res->rs_qa_qd[x];
                sync = need_sync(qd);
-                gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
+                gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
                if (sync && qd_trylock(qd))
                        qda[count++] = qd;
@@ -1038,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        s64 value;
        unsigned int x;
@@ -1050,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
        if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
-        for (x = 0; x < qa->qa_qd_num; x++) {
+        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-                qd = qa->qa_qd[x];
+                qd = ip->i_res->rs_qa_qd[x];
                if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1089,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
                       u32 uid, u32 gid)
 {
-        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        unsigned int x;
@@ -1098,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
                return;
-        for (x = 0; x < qa->qa_qd_num; x++) {
+        for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
-                qd = qa->qa_qd[x];
+                qd = ip->i_res->rs_qa_qd[x];
                if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
@@ -1108,7 +1113,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        }
 }
-int gfs2_quota_sync(struct super_block *sb, int type, int wait)
+int gfs2_quota_sync(struct super_block *sb, int type)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
        struct gfs2_quota_data **qda;
@@ -1154,7 +1159,7 @@ int gfs2_quota_sync(struct super_block *sb, int type, int wait)
 static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
 {
-        return gfs2_quota_sync(sb, type, 0);
+        return gfs2_quota_sync(sb, type);
 }
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
@@ -1549,10 +1554,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        if (error)
                return error;
+        error = gfs2_rs_alloc(ip);
+        if (error)
+                goto out_put;
        mutex_lock(&ip->i_inode.i_mutex);
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
        if (error)
-                goto out_put;
+                goto out_unlockput;
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
        if (error)
                goto out_q;
@@ -1609,8 +1618,9 @@ out_i:
        gfs2_glock_dq_uninit(&i_gh);
 out_q:
        gfs2_glock_dq_uninit(&q_gh);
-out_put:
+out_unlockput:
        mutex_unlock(&ip->i_inode.i_mutex);
+out_put:
        qd_put(qd);
        return error;
 }
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 90bf1c302a98..f25d98b87904 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -26,7 +26,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
                              u32 uid, u32 gid);
-extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
+extern int gfs2_quota_sync(struct super_block *sb, int type);
 extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f74fb9bd1973..4d34887a601d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,6 +35,9 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
+#define RSRV_CONTENTION_FACTOR 4
+#define RGRP_RSRV_MAX_CONTENDERS 2
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0x55555555UL)
 #define LBITSKIP55 (0x55555555UL)
@@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 }
 /**
+ * rs_cmp - multi-block reservation range compare
+ * @blk: absolute file system block number of the new reservation
+ * @len: number of blocks in the new reservation
+ * @rs: existing reservation to compare against
+ *
+ * returns: 1 if the block range is beyond the reach of the reservation
+ *         -1 if the block range is before the start of the reservation
+ *          0 if the block range overlaps with the reservation
+ */
+static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+{
+        u64 startblk = gfs2_rs_startblk(rs);
+        if (blk >= startblk + rs->rs_free)
+                return 1;
+        if (blk + len - 1 < startblk)
+                return -1;
+        return 0;
+}
+/**
+ * rs_find - Find a rgrp multi-block reservation that contains a given block
+ * @rgd: The rgrp
+ * @rgblk: The block we're looking for, relative to the rgrp
+ */
+static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
+{
+        struct rb_node **newn;
+        int rc;
+        u64 fsblk = rgblk + rgd->rd_data0;
+        spin_lock(&rgd->rd_rsspin);
+        newn = &rgd->rd_rstree.rb_node;
+        while (*newn) {
+                struct gfs2_blkreserv *cur =
+                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+                rc = rs_cmp(fsblk, 1, cur);
+                if (rc < 0)
+                        newn = &((*newn)->rb_left);
+                else if (rc > 0)
+                        newn = &((*newn)->rb_right);
+                else {
+                        spin_unlock(&rgd->rd_rsspin);
+                        return cur;
+                }
+        }
+        spin_unlock(&rgd->rd_rsspin);
+        return NULL;
+}
+/**
 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
 *       a block in a given allocation state.
 * @buf: the buffer that holds the bitmaps
@@ -417,6 +471,137 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
        }
 }
+/**
+ * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
+ * @ip: the inode for this reservation
+ */
+int gfs2_rs_alloc(struct gfs2_inode *ip)
+{
+        int error = 0;
+        struct gfs2_blkreserv *res;
+        if (ip->i_res)
+                return 0;
+        res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+        if (!res)
+                error = -ENOMEM;
+        down_write(&ip->i_rw_mutex);
+        if (ip->i_res)
+                kmem_cache_free(gfs2_rsrv_cachep, res);
+        else
+                ip->i_res = res;
+        up_write(&ip->i_rw_mutex);
+        return error;
+}
+static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+{
+        gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
+                       rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
+                       rs->rs_free);
+}
+/**
+ * __rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+static void __rs_deltree(struct gfs2_blkreserv *rs)
+{
+        struct gfs2_rgrpd *rgd;
+        if (!gfs2_rs_active(rs))
+                return;
+        rgd = rs->rs_rgd;
+        /* We can't do this: The reason is that when the rgrp is invalidated,
+           it's in the "middle" of acquiring the glock, but the HOLDER bit
+           isn't set yet:
+           BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
+        trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
+        if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
+                rb_erase(&rs->rs_node, &rgd->rd_rstree);
+        BUG_ON(!rgd->rd_rs_cnt);
+        rgd->rd_rs_cnt--;
+        if (rs->rs_free) {
+                /* return reserved blocks to the rgrp and the ip */
+                BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
+                rs->rs_rgd->rd_reserved -= rs->rs_free;
+                rs->rs_free = 0;
+                clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
+                smp_mb__after_clear_bit();
+        }
+        /* We can't change any of the step 1 or step 2 components of the rs.
+           E.g. We can't set rs_rgd to NULL because the rgd glock is held and
+           dequeued through this pointer.
+           Can't: atomic_set(&rs->rs_sizehint, 0);
+           Can't: rs->rs_requested = 0;
+           Can't: rs->rs_rgd = NULL;*/
+        rs->rs_bi = NULL;
+        rs->rs_biblk = 0;
+}
+/**
+ * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
+ * @rs: The reservation to remove
+ *
+ */
+void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
+{
+        struct gfs2_rgrpd *rgd;
+        if (!gfs2_rs_active(rs))
+                return;
+        rgd = rs->rs_rgd;
+        spin_lock(&rgd->rd_rsspin);
+        __rs_deltree(rs);
+        spin_unlock(&rgd->rd_rsspin);
+}
+/**
+ * gfs2_rs_delete - delete a multi-block reservation
+ * @ip: The inode for this reservation
+ *
+ */
+void gfs2_rs_delete(struct gfs2_inode *ip)
+{
+        down_write(&ip->i_rw_mutex);
+        if (ip->i_res) {
+                gfs2_rs_deltree(ip->i_res);
+                trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
+                BUG_ON(ip->i_res->rs_free);
+                kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
+                ip->i_res = NULL;
+        }
+        up_write(&ip->i_rw_mutex);
+}
+/**
+ * return_all_reservations - return all reserved blocks back to the rgrp.
+ * @rgd: the rgrp that needs its space back
+ *
+ * We previously reserved a bunch of blocks for allocation. Now we need to
+ * give them back. This leave the reservation structures in tact, but removes
+ * all of their corresponding "no-fly zones".
+ */
+static void return_all_reservations(struct gfs2_rgrpd *rgd)
+{
+        struct rb_node *n;
+        struct gfs2_blkreserv *rs;
+        spin_lock(&rgd->rd_rsspin);
+        while ((n = rb_first(&rgd->rd_rstree))) {
+                rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+                __rs_deltree(rs);
+        }
+        spin_unlock(&rgd->rd_rsspin);
+}
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 {
        struct rb_node *n;
@@ -439,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
                gfs2_free_clones(rgd);
                kfree(rgd->rd_bits);
+                return_all_reservations(rgd);
                kmem_cache_free(gfs2_rgrpd_cachep, rgd);
        }
 }
@@ -616,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
        rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
        rgd->rd_data = be32_to_cpu(buf.ri_data);
        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
+        spin_lock_init(&rgd->rd_rsspin);
        error = compute_bitstructs(rgd);
        if (error)
@@ -627,6 +814,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
                goto fail;
        rgd->rd_gl->gl_object = rgd;
+        rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb;
        rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
        if (rgd->rd_data > sdp->sd_max_rg_data)
                sdp->sd_max_rg_data = rgd->rd_data;
@@ -736,9 +924,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
 }
+static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
+{
+        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+        struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+        if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
+            rgl->rl_dinodes != str->rg_dinodes ||
+            rgl->rl_igeneration != str->rg_igeneration)
+                return 0;
+        return 1;
+}
+static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
+{
+        const struct gfs2_rgrp *str = buf;
+        rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
+        rgl->rl_flags = str->rg_flags;
+        rgl->rl_free = str->rg_free;
+        rgl->rl_dinodes = str->rg_dinodes;
+        rgl->rl_igeneration = str->rg_igeneration;
+        rgl->__pad = 0UL;
+}
+static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
+{
+        struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+        u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
+        rgl->rl_unlinked = cpu_to_be32(unlinked);
+}
+static u32 count_unlinked(struct gfs2_rgrpd *rgd)
+{
+        struct gfs2_bitmap *bi;
+        const u32 length = rgd->rd_length;
+        const u8 *buffer = NULL;
+        u32 i, goal, count = 0;
+        for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
+                goal = 0;
+                buffer = bi->bi_bh->b_data + bi->bi_offset;
+                WARN_ON(!buffer_uptodate(bi->bi_bh));
+                while (goal < bi->bi_len * GFS2_NBBY) {
+                        goal = gfs2_bitfit(buffer, bi->bi_len, goal,
+                                           GFS2_BLKST_UNLINKED);
+                        if (goal == BFITNOENT)
+                                break;
+                        count++;
+                        goal++;
+                }
+        }
+        return count;
+}
 /**
- * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
+ * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
- * @gh: The glock holder for the resource group
+ * @rgd: the struct gfs2_rgrpd describing the RG to read in
 *
 * Read in all of a Resource Group's header and bitmap blocks.
 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -746,9 +990,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
 * Returns: errno
 */
-int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
-        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_glock *gl = rgd->rd_gl;
        unsigned int length = rgd->rd_length;
@@ -756,6 +999,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
        unsigned int x, y;
        int error;
+        if (rgd->rd_bits[0].bi_bh != NULL)
+                return 0;
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
@@ -782,7 +1028,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                rgd->rd_free_clone = rgd->rd_free;
        }
+        if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
+                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
+                                     rgd->rd_bits[0].bi_bh->b_data);
+        }
+        else if (sdp->sd_args.ar_rgrplvb) {
+                if (!gfs2_rgrp_lvb_valid(rgd)){
+                        gfs2_consist_rgrpd(rgd);
+                        error = -EIO;
+                        goto fail;
+                }
+                if (rgd->rd_rgl->rl_unlinked == 0)
+                        rgd->rd_flags &= ~GFS2_RDF_CHECK;
+        }
        return 0;
 fail:
@@ -796,6 +1055,39 @@ fail:
        return error;
 }
+int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+{
+        u32 rl_flags;
+        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
+                return 0;
+        if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+                return gfs2_rgrp_bh_get(rgd);
+        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
+        rl_flags &= ~GFS2_RDF_MASK;
+        rgd->rd_flags &= GFS2_RDF_MASK;
+        rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
+        if (rgd->rd_rgl->rl_unlinked == 0)
+                rgd->rd_flags &= ~GFS2_RDF_CHECK;
+        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+        rgd->rd_free_clone = rgd->rd_free;
+        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
+        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
+        return 0;
+}
+int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
+{
+        struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
+        struct gfs2_sbd *sdp = rgd->rd_sbd;
+        if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
+                return 0;
+        return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
+}
 /**
 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
 * @gh: The glock holder for the resource group
@@ -809,8 +1101,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
        for (x = 0; x < length; x++) {
                struct gfs2_bitmap *bi = rgd->rd_bits + x;
-                brelse(bi->bi_bh);
+                if (bi->bi_bh) {
-                bi->bi_bh = NULL;
+                        brelse(bi->bi_bh);
+                        bi->bi_bh = NULL;
+                }
        }
 }
@@ -954,6 +1248,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
                                gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
                                gfs2_rgrp_out(rgd, bh->b_data);
+                                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
                                gfs2_trans_end(sdp);
                        }
                }
@@ -974,38 +1269,184 @@ out:
 }
 /**
- * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
+ * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
- * @ip: the incore GFS2 inode structure
+ * @bi: the bitmap with the blocks
+ * @ip: the inode structure
+ * @biblk: the 32-bit block number relative to the start of the bitmap
+ * @amount: the number of blocks to reserve
 *
- * Returns: the struct gfs2_qadata
+ * Returns: NULL - reservation was already taken, so not inserted
+ *          pointer to the inserted reservation
 */
+static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
+                                       struct gfs2_inode *ip, u32 biblk,
+                                       int amount)
+{
+        struct rb_node **newn, *parent = NULL;
+        int rc;
+        struct gfs2_blkreserv *rs = ip->i_res;
+        struct gfs2_rgrpd *rgd = rs->rs_rgd;
+        u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
-struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
+        spin_lock(&rgd->rd_rsspin);
+        newn = &rgd->rd_rstree.rb_node;
+        BUG_ON(!ip->i_res);
+        BUG_ON(gfs2_rs_active(rs));
+        /* Figure out where to put new node */
+        /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+        while (*newn) {
+                struct gfs2_blkreserv *cur =
+                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
+                parent = *newn;
+                rc = rs_cmp(fsblock, amount, cur);
+                if (rc > 0)
+                        newn = &((*newn)->rb_right);
+                else if (rc < 0)
+                        newn = &((*newn)->rb_left);
+                else {
+                        spin_unlock(&rgd->rd_rsspin);
+                        return NULL; /* reservation already in use */
+                }
+        }
+        /* Do our reservation work */
+        rs = ip->i_res;
+        rs->rs_free = amount;
+        rs->rs_biblk = biblk;
+        rs->rs_bi = bi;
+        rb_link_node(&rs->rs_node, parent, newn);
+        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
+        /* Do our inode accounting for the reservation */
+        /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+        /* Do our rgrp accounting for the reservation */
+        rgd->rd_reserved += amount; /* blocks reserved */
+        rgd->rd_rs_cnt++; /* number of in-tree reservations */
+        spin_unlock(&rgd->rd_rsspin);
+        trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
+        return rs;
+}
+/**
+ * unclaimed_blocks - return number of blocks that aren't spoken for
+ */
+static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
 {
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        return rgd->rd_free_clone - rgd->rd_reserved;
-        int error;
-        BUG_ON(ip->i_qadata != NULL);
-        ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
-        error = gfs2_rindex_update(sdp);
-        if (error)
-                fs_warn(sdp, "rindex update returns %d\n", error);
-        return ip->i_qadata;
 }
 /**
- * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
+ * rg_mblk_search - find a group of multiple free blocks
- * @ip: the incore GFS2 inode structure
+ * @rgd: the resource group descriptor
+ * @rs: the block reservation
+ * @ip: pointer to the inode for which we're reserving blocks
 *
- * Returns: the struct gfs2_qadata
+ * This is very similar to rgblk_search, except we're looking for whole
+ * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
+ * on aligned dwords for speed's sake.
+ *
+ * Returns: 0 if successful or BFITNOENT if there isn't enough free space
 */
-static int gfs2_blkrsv_get(struct gfs2_inode *ip)
+static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-        BUG_ON(ip->i_res != NULL);
+        struct gfs2_bitmap *bi = rgd->rd_bits;
-        ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
+        const u32 length = rgd->rd_length;
-        if (!ip->i_res)
+        u32 blk;
-                return -ENOMEM;
+        unsigned int buf, x, search_bytes;
-        return 0;
+        u8 *buffer = NULL;
+        u8 *ptr, *end, *nonzero;
+        u32 goal, rsv_bytes;
+        struct gfs2_blkreserv *rs;
+        u32 best_rs_bytes, unclaimed;
+        int best_rs_blocks;
+        /* Find bitmap block that contains bits for goal block */
+        if (rgrp_contains_block(rgd, ip->i_goal))
+                goal = ip->i_goal - rgd->rd_data0;
+        else
+                goal = rgd->rd_last_alloc;
+        for (buf = 0; buf < length; buf++) {
+                bi = rgd->rd_bits + buf;
+                /* Convert scope of "goal" from rgrp-wide to within
+                   found bit block */
+                if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
+                        goal -= bi->bi_start * GFS2_NBBY;
+                        goto do_search;
+                }
+        }
+        buf = 0;
+        goal = 0;
+do_search:
+        best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
+                               (RGRP_RSRV_MINBLKS * rgd->rd_length));
+        best_rs_bytes = (best_rs_blocks *
+                         (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
+                GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
+        best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
+        unclaimed = unclaimed_blocks(rgd);
+        if (best_rs_bytes * GFS2_NBBY > unclaimed)
+                best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
+        for (x = 0; x <= length; x++) {
+                bi = rgd->rd_bits + buf;
+                if (test_bit(GBF_FULL, &bi->bi_flags))
+                        goto skip;
+                WARN_ON(!buffer_uptodate(bi->bi_bh));
+                if (bi->bi_clone)
+                        buffer = bi->bi_clone + bi->bi_offset;
+                else
+                        buffer = bi->bi_bh->b_data + bi->bi_offset;
+                /* We have to keep the reservations aligned on u64 boundaries
+                   otherwise we could get situations where a byte can't be
+                   used because it's after a reservation, but a free bit still
+                   is within the reservation's area. */
+                ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
+                end = (buffer + bi->bi_len);
+                while (ptr < end) {
+                        rsv_bytes = 0;
+                        if ((ptr + best_rs_bytes) <= end)
+                                search_bytes = best_rs_bytes;
+                        else
+                                search_bytes = end - ptr;
+                        BUG_ON(!search_bytes);
+                        nonzero = memchr_inv(ptr, 0, search_bytes);
+                        /* If the lot is all zeroes, reserve the whole size. If
+                           there's enough zeroes to satisfy the request, use
+                           what we can. If there's not enough, keep looking. */
+                        if (nonzero == NULL)
+                                rsv_bytes = search_bytes;
+                        else if ((nonzero - ptr) * GFS2_NBBY >=
+                                 ip->i_res->rs_requested)
+                                rsv_bytes = (nonzero - ptr);
+                        if (rsv_bytes) {
+                                blk = ((ptr - buffer) * GFS2_NBBY);
+                                BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
+                                rs = rs_insert(bi, ip, blk,
+                                               rsv_bytes * GFS2_NBBY);
+                                if (IS_ERR(rs))
+                                        return PTR_ERR(rs);
+                                if (rs)
+                                        return 0;
+                        }
+                        ptr += ALIGN(search_bytes, sizeof(u64));
+                }
+skip:
+                /* Try next bitmap block (wrap back to rgrp header
+                   if at end) */
+                buf++;
+                buf %= length;
+                goal = 0;
+        }
+        return BFITNOENT;
 }
 /**
@@ -1014,24 +1455,26 @@ static int gfs2_blkrsv_get(struct gfs2_inode *ip)
 * @ip: the inode
 *
 * If there's room for the requested blocks to be allocated from the RG:
+ * This will try to get a multi-block reservation first, and if that doesn't
+ * fit, it will take what it can.
 *
 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
 */
-static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
+static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-        const struct gfs2_blkreserv *rs = ip->i_res;
+        struct gfs2_blkreserv *rs = ip->i_res;
        if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
                return 0;
-        if (rgd->rd_free_clone >= rs->rs_requested)
+        /* Look for a multi-block reservation. */
+        if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
+            rg_mblk_search(rgd, ip) != BFITNOENT)
+                return 1;
+        if (unclaimed_blocks(rgd) >= rs->rs_requested)
                return 1;
-        return 0;
-}
-static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
+        return 0;
-{
-        return (bi->bi_start * GFS2_NBBY) + blk;
 }
 /**
@@ -1101,119 +1544,120 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 }
 /**
- * get_local_rgrp - Choose and lock a rgrp for allocation
+ * gfs2_inplace_reserve - Reserve space in the filesystem
 * @ip: the inode to reserve space for
- * @last_unlinked: the last unlinked block
+ * @requested: the number of blocks to be reserved
- *
- * Try to acquire rgrp in way which avoids contending with others.
 *
 * Returns: errno
 */
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_rgrpd *rgd, *begin = NULL;
+        struct gfs2_rgrpd *begin = NULL;
        struct gfs2_blkreserv *rs = ip->i_res;
-        int error, rg_locked, flags = LM_FLAG_TRY;
+        int error = 0, rg_locked, flags = LM_FLAG_TRY;
+        u64 last_unlinked = NO_BLOCK;
        int loops = 0;
-        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
+        if (sdp->sd_args.ar_rgrplvb)
-                rgd = begin = ip->i_rgd;
+                flags |= GL_SKIP;
-        else
+        rs->rs_requested = requested;
-                rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+        if (gfs2_assert_warn(sdp, requested)) {
+                error = -EINVAL;
-        if (rgd == NULL)
+                goto out;
+        }
+        if (gfs2_rs_active(rs)) {
+                begin = rs->rs_rgd;
+                flags = 0; /* Yoda: Do or do not. There is no try */
+        } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
+                rs->rs_rgd = begin = ip->i_rgd;
+        } else {
+                rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+        }
+        if (rs->rs_rgd == NULL)
                return -EBADSLT;
        while (loops < 3) {
                rg_locked = 0;
-                if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+                if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
                        rg_locked = 1;
                        error = 0;
+                } else if (!loops && !gfs2_rs_active(rs) &&
+                           rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
+                        /* If the rgrp already is maxed out for contenders,
+                           we can eliminate it as a "first pass" without even
+                           requesting the rgrp glock. */
+                        error = GLR_TRYFAILED;
                } else {
-                        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                        error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
-                                                   flags, &rs->rs_rgd_gh);
+                                                   LM_ST_EXCLUSIVE, flags,
+                                                   &rs->rs_rgd_gh);
+                        if (!error && sdp->sd_args.ar_rgrplvb) {
+                                error = update_rgrp_lvb(rs->rs_rgd);
+                                if (error) {
+                                        gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+                                        return error;
+                                }
+                        }
                }
                switch (error) {
                case 0:
-                        if (try_rgrp_fit(rgd, ip)) {
+                        if (gfs2_rs_active(rs)) {
-                                ip->i_rgd = rgd;
+                                if (unclaimed_blocks(rs->rs_rgd) +
+                                    rs->rs_free >= rs->rs_requested) {
+                                        ip->i_rgd = rs->rs_rgd;
+                                        return 0;
+                                }
+                                /* We have a multi-block reservation, but the
+                                   rgrp doesn't have enough free blocks to
+                                   satisfy the request. Free the reservation
+                                   and look for a suitable rgrp. */
+                                gfs2_rs_deltree(rs);
+                        }
+                        if (try_rgrp_fit(rs->rs_rgd, ip)) {
+                                if (sdp->sd_args.ar_rgrplvb)
+                                        gfs2_rgrp_bh_get(rs->rs_rgd);
+                                ip->i_rgd = rs->rs_rgd;
                                return 0;
                        }
-                        if (rgd->rd_flags & GFS2_RDF_CHECK)
+                        if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
-                                try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
+                                if (sdp->sd_args.ar_rgrplvb)
+                                        gfs2_rgrp_bh_get(rs->rs_rgd);
+                                try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
+                                                ip->i_no_addr);
+                        }
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
                        /* fall through */
                case GLR_TRYFAILED:
-                        rgd = gfs2_rgrpd_get_next(rgd);
+                        rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
-                        if (rgd == begin) {
+                        rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
-                                flags = 0;
+                        if (rs->rs_rgd != begin) /* If we didn't wrap */
-                                loops++;
+                                break;
-                        }
+                        flags &= ~LM_FLAG_TRY;
+                        loops++;
+                        /* Check that fs hasn't grown if writing to rindex */
+                        if (ip == GFS2_I(sdp->sd_rindex) &&
+                            !sdp->sd_rindex_uptodate) {
+                                error = gfs2_ri_update(ip);
+                                if (error)
+                                        goto out;
+                        } else if (loops == 2)
+                                /* Flushing the log may release space */
+                                gfs2_log_flush(sdp, NULL);
                        break;
                default:
-                        return error;
+                        goto out;
                }
        }
+        error = -ENOSPC;
-        return -ENOSPC;
-}
-static void gfs2_blkrsv_put(struct gfs2_inode *ip)
-{
-        BUG_ON(ip->i_res == NULL);
-        kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
-        ip->i_res = NULL;
-}
-/**
- * gfs2_inplace_reserve - Reserve space in the filesystem
- * @ip: the inode to reserve space for
- * @requested: the number of blocks to be reserved
- *
- * Returns: errno
- */
-int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_blkreserv *rs;
-        int error;
-        u64 last_unlinked = NO_BLOCK;
-        int tries = 0;
-        error = gfs2_blkrsv_get(ip);
-        if (error)
-                return error;
-        rs = ip->i_res;
-        rs->rs_requested = requested;
-        if (gfs2_assert_warn(sdp, requested)) {
-                error = -EINVAL;
-                goto out;
-        }
-        do {
-                error = get_local_rgrp(ip, &last_unlinked);
-                if (error != -ENOSPC)
-                        break;
-                /* Check that fs hasn't grown if writing to rindex */
-                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
-                        error = gfs2_ri_update(ip);
-                        if (error)
-                                break;
-                        continue;
-                }
-                /* Flushing the log may release space */
-                gfs2_log_flush(sdp, NULL);
-        } while (tries++ < 3);
 out:
        if (error)
-                gfs2_blkrsv_put(ip);
+                rs->rs_requested = 0;
        return error;
 }
@@ -1228,9 +1672,15 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 {
        struct gfs2_blkreserv *rs = ip->i_res;
+        if (!rs)
+                return;
+        if (!rs->rs_free)
+                gfs2_rs_deltree(rs);
        if (rs->rs_rgd_gh.gh_gl)
                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-        gfs2_blkrsv_put(ip);
+        rs->rs_requested = 0;
 }
 /**
@@ -1326,7 +1776,27 @@ do_search:
                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
                        buffer = bi->bi_clone + bi->bi_offset;
-                biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+                while (1) {
+                        struct gfs2_blkreserv *rs;
+                        u32 rgblk;
+                        biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
+                        if (biblk == BFITNOENT)
+                                break;
+                        /* Check if this block is reserved() */
+                        rgblk = gfs2_bi2rgd_blk(bi, biblk);
+                        rs = rs_find(rgd, rgblk);
+                        if (rs == NULL)
+                                break;
+                        BUG_ON(rs->rs_bi != bi);
+                        biblk = BFITNOENT;
+                        /* This should jump to the first block after the
+                           reservation. */
+                        goal = rs->rs_biblk + rs->rs_free;
+                        if (goal >= bi->bi_len * GFS2_NBBY)
+                                break;
+                }
                if (biblk != BFITNOENT)
                        break;
@@ -1362,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
                             u32 blk, bool dinode, unsigned int *n)
 {
        const unsigned int elen = *n;
-        u32 goal;
+        u32 goal, rgblk;
        const u8 *buffer = NULL;
+        struct gfs2_blkreserv *rs;
        *n = 0;
        buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1376,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
                goal++;
                if (goal >= (bi->bi_len * GFS2_NBBY))
                        break;
+                rgblk = gfs2_bi2rgd_blk(bi, goal);
+                rs = rs_find(rgd, rgblk);
+                if (rs) /* Oops, we bumped into someone's reservation */
+                        break;
                if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
                    GFS2_BLKST_FREE)
                        break;
@@ -1451,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 {
-        const struct gfs2_rgrpd *rgd = gl->gl_object;
+        struct gfs2_rgrpd *rgd = gl->gl_object;
+        struct gfs2_blkreserv *trs;
+        const struct rb_node *n;
        if (rgd == NULL)
                return 0;
-        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n",
+        gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
-                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes);
+                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
+                       rgd->rd_reserved);
+        spin_lock(&rgd->rd_rsspin);
+        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
+                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
+                dump_rs(seq, trs);
+        }
+        spin_unlock(&rgd->rd_rsspin);
        return 0;
 }
@@ -1471,10 +1956,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 }
 /**
+ * claim_reserved_blks - Claim previously reserved blocks
+ * @ip: the inode that's claiming the reservation
+ * @dinode: 1 if this block is a dinode block, otherwise data block
+ * @nblocks: desired extent length
+ *
+ * Lay claim to previously allocated block reservation blocks.
+ * Returns: Starting block number of the blocks claimed.
+ * Sets *nblocks to the actual extent length allocated.
+ */
+static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
+                               unsigned int *nblocks)
+{
+        struct gfs2_blkreserv *rs = ip->i_res;
+        struct gfs2_rgrpd *rgd = rs->rs_rgd;
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        struct gfs2_bitmap *bi;
+        u64 start_block = gfs2_rs_startblk(rs);
+        const unsigned int elen = *nblocks;
+        /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
+        gfs2_assert_withdraw(sdp, rgd);
+        /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+        bi = rs->rs_bi;
+        gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+        for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
+                /* Make sure the bitmap hasn't changed */
+                gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
+                            dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+                rs->rs_biblk++;
+                rs->rs_free--;
+                BUG_ON(!rgd->rd_reserved);
+                rgd->rd_reserved--;
+                dinode = false;
+                trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+        }
+        if (!rs->rs_free) {
+                struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
+                gfs2_rs_deltree(rs);
+                /* -nblocks because we haven't returned to do the math yet.
+                   I'm doing the math backwards to prevent negative numbers,
+                   but think of it as:
+                   if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
+                if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
+                        rg_mblk_search(rgd, ip);
+        }
+        return start_block;
+}
+/**
 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
 * @ip: the inode to allocate the block for
 * @bn: Used to return the starting block number
- * @ndata: requested number of blocks/extent length (value/result)
+ * @nblocks: requested number of blocks/extent length (value/result)
 * @dinode: 1 if we're allocating a dinode block, else 0
 * @generation: the generation number of the inode
 *
@@ -1496,23 +2034,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
        /* Only happens if there is a bug in gfs2, return something distinctive
         * to ensure that it is noticed.
         */
-        if (ip->i_res == NULL)
+        if (ip->i_res->rs_requested == 0)
                return -ECANCELED;
-        rgd = ip->i_rgd;
+        /* Check if we have a multi-block reservation, and if so, claim the
+           next free block from it. */
-        if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
+        if (gfs2_rs_active(ip->i_res)) {
-                goal = ip->i_goal - rgd->rd_data0;
+                BUG_ON(!ip->i_res->rs_free);
-        else
+                rgd = ip->i_res->rs_rgd;
-                goal = rgd->rd_last_alloc;
+                block = claim_reserved_blks(ip, dinode, nblocks);
+        } else {
-        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+                rgd = ip->i_rgd;
-        /* Since all blocks are reserved in advance, this shouldn't happen */
+                if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-        if (blk == BFITNOENT)
+                        goal = ip->i_goal - rgd->rd_data0;
-                goto rgrp_error;
+                else
+                        goal = rgd->rd_last_alloc;
+                blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
+                /* Since all blocks are reserved in advance, this shouldn't
+                   happen */
+                if (blk == BFITNOENT) {
+                        printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
+                               *nblocks);
+                        printk(KERN_WARNING "FULL=%d\n",
+                               test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
+                        goto rgrp_error;
+                }
-        block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+                block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
+        }
        ndata = *nblocks;
        if (dinode)
                ndata--;
@@ -1529,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
                        brelse(dibh);
                }
        }
-        if (rgd->rd_free < *nblocks)
+        if (rgd->rd_free < *nblocks) {
+                printk(KERN_WARNING "nblocks=%u\n", *nblocks);
                goto rgrp_error;
+        }
        rgd->rd_free -= *nblocks;
        if (dinode) {
@@ -1542,6 +2096,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
        if (dinode)
@@ -1588,6 +2143,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
        /* Directories keep their data in the metadata address space */
        if (meta || ip->i_depth)
@@ -1624,6 +2180,8 @@ void gfs2_unlink_di(struct inode *inode)
        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+        update_rgrp_lvb_unlinked(rgd, 1);
 }
 static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
@@ -1643,6 +2201,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+        gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+        update_rgrp_lvb_unlinked(rgd, -1);
        gfs2_statfs_change(sdp, 0, +1, -1);
 }
@@ -1784,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
                for (x = 0; x < rlist->rl_rgrps; x++)
                        gfs2_holder_uninit(&rlist->rl_ghs[x]);
                kfree(rlist->rl_ghs);
+                rlist->rl_ghs = NULL;
        }
 }
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b4b10f4de25f..ca6e26729b86 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -13,6 +13,14 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+/* Since each block in the file system is represented by two bits in the
+ * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
+ * By reserving 32 blocks at a time, we can optimize / shortcut how we search
+ * through the bitmaps by looking a word at a time.
+ */
+#define RGRP_RSRV_MINBYTES 8
+#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
 struct gfs2_rgrpd;
 struct gfs2_sbd;
 struct gfs2_holder;
@@ -29,13 +37,7 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
-extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
+extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
-static inline void gfs2_qadata_put(struct gfs2_inode *ip)
-{
-        BUG_ON(ip->i_qadata == NULL);
-        kfree(ip->i_qadata);
-        ip->i_qadata = NULL;
-}
 extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
@@ -43,6 +45,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
 extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
                             bool dinode, u64 *generation);
+extern int gfs2_rs_alloc(struct gfs2_inode *ip);
+extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
 extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
@@ -68,4 +73,30 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
                                   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
+/* This is how to tell if a multi-block reservation is "inplace" reserved: */
+static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
+{
+        if (ip->i_res && ip->i_res->rs_requested)
+                return 1;
+        return 0;
+}
+/* This is how to tell if a multi-block reservation is in the rgrp tree: */
+static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
+{
+        if (rs && rs->rs_bi)
+                return 1;
+        return 0;
+}
+static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
+{
+        return (bi->bi_start * GFS2_NBBY) + blk;
+}
+static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
+{
+        return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
+}
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 713e621c240b..fc3168f47a14 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -78,6 +78,8 @@ enum {
        Opt_quota_quantum,
        Opt_barrier,
        Opt_nobarrier,
+        Opt_rgrplvb,
+        Opt_norgrplvb,
        Opt_error,
 };
@@ -115,6 +117,8 @@ static const match_table_t tokens = {
        {Opt_quota_quantum, "quota_quantum=%d"},
        {Opt_barrier, "barrier"},
        {Opt_nobarrier, "nobarrier"},
+        {Opt_rgrplvb, "rgrplvb"},
+        {Opt_norgrplvb, "norgrplvb"},
        {Opt_error, NULL}
 };
@@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
                case Opt_nobarrier:
                        args->ar_nobarrier = 1;
                        break;
+                case Opt_rgrplvb:
+                        args->ar_rgrplvb = 1;
+                        break;
+                case Opt_norgrplvb:
+                        args->ar_rgrplvb = 0;
+                        break;
                case Opt_error:
                default:
                        printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
@@ -838,7 +848,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
        int error;
        flush_workqueue(gfs2_delete_workqueue);
-        gfs2_quota_sync(sdp->sd_vfs, 0, 1);
+        gfs2_quota_sync(sdp->sd_vfs, 0);
        gfs2_statfs_sync(sdp->sd_vfs, 0);
        error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
@@ -952,6 +962,8 @@ restart:
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
+        gfs2_quota_sync(sb, -1);
        if (wait && sdp)
                gfs2_log_flush(sdp, NULL);
        return 0;
@@ -1379,6 +1391,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
                seq_printf(s, ",nobarrier");
        if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
                seq_printf(s, ",demote_interface_used");
+        if (args->ar_rgrplvb)
+                seq_printf(s, ",rgrplvb");
        return 0;
 }
@@ -1399,7 +1413,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_qadata *qa;
        struct gfs2_rgrpd *rgd;
        struct gfs2_holder gh;
        int error;
@@ -1409,13 +1422,13 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
                return -EIO;
        }
-        qa = gfs2_qadata_get(ip);
+        error = gfs2_rindex_update(sdp);
-        if (!qa)
+        if (error)
-                return -ENOMEM;
+                return error;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-                goto out;
+                return error;
        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
        if (!rgd) {
@@ -1443,8 +1456,6 @@ out_rg_gunlock:
        gfs2_glock_dq_uninit(&gh);
 out_qs:
        gfs2_quota_unhold(ip);
-out:
-        gfs2_qadata_put(ip);
        return error;
 }
@@ -1545,6 +1556,9 @@ out_truncate:
 out_unlock:
        /* Error path for case 1 */
+        if (gfs2_rs_active(ip->i_res))
+                gfs2_rs_deltree(ip->i_res);
        if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
                gfs2_glock_dq(&ip->i_iopen_gh);
        gfs2_holder_uninit(&ip->i_iopen_gh);
@@ -1554,6 +1568,7 @@ out_unlock:
 out:
        /* Case 3 starts here */
        truncate_inode_pages(&inode->i_data, 0);
+        gfs2_rs_delete(ip);
        clear_inode(inode);
        gfs2_dir_hash_inval(ip);
        ip->i_gl->gl_object = NULL;
@@ -1576,6 +1591,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
                ip->i_flags = 0;
                ip->i_gl = NULL;
                ip->i_rgd = NULL;
+                ip->i_res = NULL;
        }
        return &ip->i_inode;
 }
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9c2592b1d5ff..8056b7b7238e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -168,7 +168,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
        if (simple_strtol(buf, NULL, 0) != 1)
                return -EINVAL;
-        gfs2_quota_sync(sdp->sd_vfs, 0, 1);
+        gfs2_quota_sync(sdp->sd_vfs, 0);
        return len;
 }
@@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = {
        NULL,
 };
+static void gfs2_sbd_release(struct kobject *kobj)
+{
+        struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
+        kfree(sdp);
+}
 static struct kobj_type gfs2_ktype = {
+        .release = gfs2_sbd_release,
        .default_attrs = gfs2_attrs,
        .sysfs_ops     = &gfs2_attr_ops,
 };
@@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
        char ro[20];
        char spectator[20];
        char *envp[] = { ro, spectator, NULL };
+        int sysfs_frees_sdp = 0;
        sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
        sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
@@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
        error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
                                     "%s", sdp->sd_table_name);
        if (error)
-                goto fail;
+                goto fail_reg;
+        sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
+                                function gfs2_sbd_release. */
        error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
        if (error)
                goto fail_reg;
@@ -615,9 +626,13 @@ fail_lock_module:
 fail_tune:
        sysfs_remove_group(&sdp->sd_kobj, &tune_group);
 fail_reg:
-        kobject_put(&sdp->sd_kobj);
+        free_percpu(sdp->sd_lkstats);
-fail:
        fs_err(sdp, "error %d adding sysfs files", error);
+        if (sysfs_frees_sdp)
+                kobject_put(&sdp->sd_kobj);
+        else
+                kfree(sdp);
+        sb->s_fs_info = NULL;
        return error;
 }
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 1b8b81588199..a25c252fe412 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -14,6 +14,7 @@
 #include <linux/ktime.h>
 #include "incore.h"
 #include "glock.h"
+#include "rgrp.h"
 #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
 #define glock_trace_name(x) __print_symbolic(x,         \
@@ -31,6 +32,17 @@
                            { GFS2_BLKST_DINODE, "dinode" },    \
                            { GFS2_BLKST_UNLINKED, "unlinked" })
+#define TRACE_RS_DELETE  0
+#define TRACE_RS_TREEDEL 1
+#define TRACE_RS_INSERT  2
+#define TRACE_RS_CLAIM   3
+#define rs_func_name(x) __print_symbolic(x,     \
+                                         { 0, "del " }, \
+                                         { 1, "tdel" }, \
+                                         { 2, "ins " }, \
+                                         { 3, "clm " })
 #define show_glock_flags(flags) __print_flags(flags, "",        \
        {(1UL << GLF_LOCK),                     "l" },          \
        {(1UL << GLF_DEMOTE),                   "D" },          \
@@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc,
                __field(        u8,     block_state             )
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
+                __field(        u32,    rd_reserved             )
        ),
        TP_fast_assign(
@@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc,
                __entry->block_state    = block_state;
                __entry->rd_addr        = rgd->rd_addr;
                __entry->rd_free_clone  = rgd->rd_free_clone;
+                __entry->rd_reserved    = rgd->rd_reserved;
        ),
-        TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u",
+        TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long)__entry->len,
                  block_state_name(__entry->block_state),
                  (unsigned long long)__entry->rd_addr,
-                  __entry->rd_free_clone)
+                  __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+);
+/* Keep track of multi-block reservations as they are allocated/freed */
+TRACE_EVENT(gfs2_rs,
+        TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
+                 u8 func),
+        TP_ARGS(ip, rs, func),
+        TP_STRUCT__entry(
+                __field(        dev_t,  dev                     )
+                __field(        u64,    rd_addr                 )
+                __field(        u32,    rd_free_clone           )
+                __field(        u32,    rd_reserved             )
+                __field(        u64,    inum                    )
+                __field(        u64,    start                   )
+                __field(        u32,    free                    )
+                __field(        u8,     func                    )
+        ),
+        TP_fast_assign(
+                __entry->dev            = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
+                __entry->rd_addr        = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
+                __entry->rd_free_clone  = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
+                __entry->rd_reserved    = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
+                __entry->inum           = ip ? ip->i_no_addr : 0;
+                __entry->start          = gfs2_rs_startblk(rs);
+                __entry->free           = rs->rs_free;
+                __entry->func           = func;
+        ),
+        TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
+                  "f:%lu",
+                  MAJOR(__entry->dev), MINOR(__entry->dev),
+                  (unsigned long long)__entry->inum,
+                  (unsigned long long)__entry->start,
+                  (unsigned long long)__entry->rd_addr,
+                  (unsigned long)__entry->rd_free_clone,
+                  (unsigned long)__entry->rd_reserved,
+                  rs_func_name(__entry->func), (unsigned long)__entry->free)
 );
 #endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 125d4572e1c0..41f42cdccbb8 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -31,7 +31,7 @@ struct gfs2_glock;
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
 {
        const struct gfs2_blkreserv *rs = ip->i_res;
-        if (rs->rs_requested < ip->i_rgd->rd_length)
+        if (rs && rs->rs_requested < ip->i_rgd->rd_length)
                return rs->rs_requested + 1;
        return ip->i_rgd->rd_length;
 }
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 3586b0dd6aa7..80535739ac7b 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
                       const char *type, const char *function,
                       char *file, unsigned int line);
-static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
+static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
-                                    struct buffer_head *bh,
+                                    struct buffer_head *bh)
-                                    const char *function,
-                                    char *file, unsigned int line)
 {
        struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
        u32 magic = be32_to_cpu(mh->mh_magic);
-        if (unlikely(magic != GFS2_MAGIC))
+        if (unlikely(magic != GFS2_MAGIC)) {
-                return gfs2_meta_check_ii(sdp, bh, "magic number", function,
+                printk(KERN_ERR "GFS2: Magic number missing at %llu\n",
-                                          file, line);
+                       (unsigned long long)bh->b_blocknr);
+                return -EIO;
+        }
        return 0;
 }
-#define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
                           u16 type, u16 t,
                           const char *function,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 927f4df874ae..27a0b4a901f5 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -325,12 +325,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
                               struct gfs2_ea_header *ea,
                               struct gfs2_ea_header *prev, int leave)
 {
-        struct gfs2_qadata *qa;
        int error;
-        qa = gfs2_qadata_get(ip);
+        error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
-        if (!qa)
+        if (error)
-                return -ENOMEM;
+                return error;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
@@ -340,7 +339,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        gfs2_quota_unhold(ip);
 out_alloc:
-        gfs2_qadata_put(ip);
        return error;
 }
@@ -713,17 +711,16 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                             unsigned int blks,
                             ea_skeleton_call_t skeleton_call, void *private)
 {
-        struct gfs2_qadata *qa;
        struct buffer_head *dibh;
        int error;
-        qa = gfs2_qadata_get(ip);
+        error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
-        if (!qa)
+        if (error)
-                return -ENOMEM;
+                return error;
        error = gfs2_quota_lock_check(ip);
        if (error)
-                goto out;
+                return error;
        error = gfs2_inplace_reserve(ip, blks);
        if (error)
@@ -753,8 +750,6 @@ out_ipres:
        gfs2_inplace_release(ip);
 out_gunlock_q:
        gfs2_quota_unlock(ip);
-out:
-        gfs2_qadata_put(ip);
        return error;
 }
@@ -1494,16 +1489,15 @@ out_gunlock:
 int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
-        struct gfs2_qadata *qa;
        int error;
-        qa = gfs2_qadata_get(ip);
+        error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
-        if (!qa)
+        if (error)
-                return -ENOMEM;
+                return error;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
        if (error)
-                goto out_alloc;
+                return error;
        error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
        if (error)
@@ -1519,8 +1513,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 out_quota:
        gfs2_quota_unhold(ip);
-out_alloc:
-        gfs2_qadata_put(ip);
        return error;
 }
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 62fc14ea4b73..422dde2ec0a1 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -18,7 +18,7 @@
 * hfs_lookup()
 */
 static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
-                                 struct nameidata *nd)
+                                 unsigned int flags)
 {
        hfs_cat_rec rec;
        struct hfs_find_data fd;
@@ -187,7 +187,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
 * the directory and the name (and its length) of the new file.
 */
 static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      struct nameidata *nd)
+                      bool excl)
 {
        struct inode *inode;
        int res;
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index 2c16316d2917..a67955a0c36f 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -432,7 +432,7 @@ out:
                if (inode->i_ino < HFS_FIRSTUSER_CNID)
                        set_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags);
                set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-                sb->s_dirt = 1;
+                hfs_mark_mdb_dirty(sb);
        }
        return res;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 1bf967c6bfdc..8275175acf6e 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
+#include <linux/workqueue.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
@@ -137,16 +138,15 @@ struct hfs_sb_info {
        gid_t s_gid;                            /* The gid of all files */
        int session, part;
        struct nls_table *nls_io, *nls_disk;
        struct mutex bitmap_lock;
        unsigned long flags;
        u16 blockoffset;
        int fs_div;
+        struct super_block *sb;
+        int work_queued;                /* non-zero delayed work is queued */
+        struct delayed_work mdb_work;   /* MDB flush delayed work */
+        spinlock_t work_lock;           /* protects mdb_work and work_queued */
 };
 #define HFS_FLG_BITMAP_DIRTY    0
@@ -226,6 +226,9 @@ extern int hfs_compare_dentry(const struct dentry *parent,
 extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
 extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *);
+/* super.c */
+extern void hfs_mark_mdb_dirty(struct super_block *sb);
 extern struct timezone sys_tz;
 /*
@@ -253,7 +256,7 @@ static inline const char *hfs_mdb_name(struct super_block *sb)
 static inline void hfs_bitmap_dirty(struct super_block *sb)
 {
        set_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags);
-        sb->s_dirt = 1;
+        hfs_mark_mdb_dirty(sb);
 }
 #define sb_bread512(sb, sec, data) ({                   \
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 761ec06354b4..ee1bc55677f1 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -220,7 +220,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode)
        insert_inode_hash(inode);
        mark_inode_dirty(inode);
        set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-        sb->s_dirt = 1;
+        hfs_mark_mdb_dirty(sb);
        return inode;
 }
@@ -235,7 +235,7 @@ void hfs_delete_inode(struct inode *inode)
                if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID))
                        HFS_SB(sb)->root_dirs--;
                set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-                sb->s_dirt = 1;
+                hfs_mark_mdb_dirty(sb);
                return;
        }
        HFS_SB(sb)->file_count--;
@@ -248,7 +248,7 @@ void hfs_delete_inode(struct inode *inode)
                }
        }
        set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
-        sb->s_dirt = 1;
+        hfs_mark_mdb_dirty(sb);
 }
 void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
@@ -489,7 +489,7 @@ out:
 }
 static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
-                                      struct nameidata *nd)
+                                      unsigned int flags)
 {
        struct inode *inode = NULL;
        hfs_cat_rec rec;
@@ -644,13 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
        /* sync the superblock to buffers */
        sb = inode->i_sb;
-        if (sb->s_dirt) {
+        flush_delayed_work_sync(&HFS_SB(sb)->mdb_work);
-                lock_super(sb);
-                sb->s_dirt = 0;
-                if (!(sb->s_flags & MS_RDONLY))
-                        hfs_mdb_commit(sb);
-                unlock_super(sb);
-        }
        /* .. finally sync the buffers to disk */
        err = sync_blockdev(sb->s_bdev);
        if (!ret)
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 1563d5ce5764..5fd51a5833ff 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -260,6 +260,10 @@ void hfs_mdb_commit(struct super_block *sb)
 {
        struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
+        if (sb->s_flags & MS_RDONLY)
+                return;
+        lock_buffer(HFS_SB(sb)->mdb_bh);
        if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) {
                /* These parameters may have been modified, so write them back */
                mdb->drLsMod = hfs_mtime();
@@ -283,9 +287,13 @@ void hfs_mdb_commit(struct super_block *sb)
                                     &mdb->drXTFlSize, NULL);
                hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec,
                                     &mdb->drCTFlSize, NULL);
+                lock_buffer(HFS_SB(sb)->alt_mdb_bh);
                memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE);
                HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
                HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
+                unlock_buffer(HFS_SB(sb)->alt_mdb_bh);
                mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
                sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
        }
@@ -308,7 +316,11 @@ void hfs_mdb_commit(struct super_block *sb)
                                break;
                        }
                        len = min((int)sb->s_blocksize - off, size);
+                        lock_buffer(bh);
                        memcpy(bh->b_data + off, ptr, len);
+                        unlock_buffer(bh);
                        mark_buffer_dirty(bh);
                        brelse(bh);
                        block++;
@@ -317,6 +329,7 @@ void hfs_mdb_commit(struct super_block *sb)
                        size -= len;
                }
        }
+        unlock_buffer(HFS_SB(sb)->mdb_bh);
 }
 void hfs_mdb_close(struct super_block *sb)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 7b4c537d6e13..4eb873e0c07b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -29,43 +29,9 @@ static struct kmem_cache *hfs_inode_cachep;
 MODULE_LICENSE("GPL");
-/*
- * hfs_write_super()
- *
- * Description:
- *   This function is called by the VFS only. When the filesystem
- *   is mounted r/w it updates the MDB on disk.
- * Input Variable(s):
- *   struct super_block *sb: Pointer to the hfs superblock
- * Output Variable(s):
- *   NONE
- * Returns:
- *   void
- * Preconditions:
- *   'sb' points to a "valid" (struct super_block).
- * Postconditions:
- *   The MDB is marked 'unsuccessfully unmounted' by clearing bit 8 of drAtrb
- *   (hfs_put_super() must set this flag!). Some MDB fields are updated
- *   and the MDB buffer is written to disk by calling hfs_mdb_commit().
- */
-static void hfs_write_super(struct super_block *sb)
-{
-        lock_super(sb);
-        sb->s_dirt = 0;
-        /* sync everything to the buffers */
-        if (!(sb->s_flags & MS_RDONLY))
-                hfs_mdb_commit(sb);
-        unlock_super(sb);
-}
 static int hfs_sync_fs(struct super_block *sb, int wait)
 {
-        lock_super(sb);
        hfs_mdb_commit(sb);
-        sb->s_dirt = 0;
-        unlock_super(sb);
        return 0;
 }
@@ -78,13 +44,44 @@ static int hfs_sync_fs(struct super_block *sb, int wait)
 */
 static void hfs_put_super(struct super_block *sb)
 {
-        if (sb->s_dirt)
+        cancel_delayed_work_sync(&HFS_SB(sb)->mdb_work);
-                hfs_write_super(sb);
        hfs_mdb_close(sb);
        /* release the MDB's resources */
        hfs_mdb_put(sb);
 }
+static void flush_mdb(struct work_struct *work)
+{
+        struct hfs_sb_info *sbi;
+        struct super_block *sb;
+        sbi = container_of(work, struct hfs_sb_info, mdb_work.work);
+        sb = sbi->sb;
+        spin_lock(&sbi->work_lock);
+        sbi->work_queued = 0;
+        spin_unlock(&sbi->work_lock);
+        hfs_mdb_commit(sb);
+}
+void hfs_mark_mdb_dirty(struct super_block *sb)
+{
+        struct hfs_sb_info *sbi = HFS_SB(sb);
+        unsigned long delay;
+        if (sb->s_flags & MS_RDONLY)
+                return;
+        spin_lock(&sbi->work_lock);
+        if (!sbi->work_queued) {
+                delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+                queue_delayed_work(system_long_wq, &sbi->mdb_work, delay);
+                sbi->work_queued = 1;
+        }
+        spin_unlock(&sbi->work_lock);
+}
 /*
 * hfs_statfs()
 *
@@ -184,7 +181,6 @@ static const struct super_operations hfs_super_operations = {
        .write_inode    = hfs_write_inode,
        .evict_inode    = hfs_evict_inode,
        .put_super      = hfs_put_super,
-        .write_super    = hfs_write_super,
        .sync_fs        = hfs_sync_fs,
        .statfs         = hfs_statfs,
        .remount_fs     = hfs_remount,
@@ -387,7 +383,10 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
        if (!sbi)
                return -ENOMEM;
+        sbi->sb = sb;
        sb->s_fs_info = sbi;
+        spin_lock_init(&sbi->work_lock);
+        INIT_DELAYED_WORK(&sbi->mdb_work, flush_mdb);
        res = -EINVAL;
        if (!parse_options((char *)data, sbi)) {
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 19cf291eb91f..91b91fd3a901 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -13,12 +13,12 @@
 /* dentry case-handling: just lowercase everything */
-static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
+static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode;
        int diff;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        inode = dentry->d_inode;
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 1cad80c789cb..4cfbe2edd296 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -153,7 +153,7 @@ done:
        kunmap(page);
        *max = offset + (curr - pptr) * 32 + i - start;
        sbi->free_blocks -= *max;
-        sb->s_dirt = 1;
+        hfsplus_mark_mdb_dirty(sb);
        dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
 out:
        mutex_unlock(&sbi->alloc_mutex);
@@ -228,7 +228,7 @@ out:
        set_page_dirty(page);
        kunmap(page);
        sbi->free_blocks += len;
-        sb->s_dirt = 1;
+        hfsplus_mark_mdb_dirty(sb);
        mutex_unlock(&sbi->alloc_mutex);
        return 0;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 26b53fb09f68..6b9f921ef2fa 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -25,7 +25,7 @@ static inline void hfsplus_instantiate(struct dentry *dentry,
 /* Find the entry inside dir named dentry->d_name */
 static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
-                                     struct nameidata *nd)
+                                     unsigned int flags)
 {
        struct inode *inode = NULL;
        struct hfs_find_data fd;
@@ -316,7 +316,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
        inode->i_ctime = CURRENT_TIME_SEC;
        mark_inode_dirty(inode);
        sbi->file_count++;
-        dst_dir->i_sb->s_dirt = 1;
+        hfsplus_mark_mdb_dirty(dst_dir->i_sb);
 out:
        mutex_unlock(&sbi->vh_mutex);
        return res;
@@ -465,7 +465,7 @@ out:
 }
 static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                          struct nameidata *nd)
+                          bool excl)
 {
        return hfsplus_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 4e75ac646fea..558dbb463a4e 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -153,8 +153,11 @@ struct hfsplus_sb_info {
        gid_t gid;
        int part, session;
        unsigned long flags;
+        int work_queued;               /* non-zero delayed work is queued */
+        struct delayed_work sync_work; /* FS sync delayed work */
+        spinlock_t work_lock;          /* protects sync_work and work_queued */
 };
 #define HFSPLUS_SB_WRITEBACKUP  0
@@ -428,7 +431,7 @@ int hfsplus_show_options(struct seq_file *, struct dentry *);
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
-int hfsplus_sync_fs(struct super_block *sb, int wait);
+void hfsplus_mark_mdb_dirty(struct super_block *sb);
 /* tables.c */
 extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 82b69ee4dacc..3d8b4a675ba0 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -168,7 +168,7 @@ const struct dentry_operations hfsplus_dentry_operations = {
 };
 static struct dentry *hfsplus_file_lookup(struct inode *dir,
-                struct dentry *dentry, struct nameidata *nd)
+                struct dentry *dentry, unsigned int flags)
 {
        struct hfs_find_data fd;
        struct super_block *sb = dir->i_sb;
@@ -431,7 +431,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
                sbi->file_count++;
        insert_inode_hash(inode);
        mark_inode_dirty(inode);
-        sb->s_dirt = 1;
+        hfsplus_mark_mdb_dirty(sb);
        return inode;
 }
@@ -442,7 +442,7 @@ void hfsplus_delete_inode(struct inode *inode)
        if (S_ISDIR(inode->i_mode)) {
                HFSPLUS_SB(sb)->folder_count--;
-                sb->s_dirt = 1;
+                hfsplus_mark_mdb_dirty(sb);
                return;
        }
        HFSPLUS_SB(sb)->file_count--;
@@ -455,7 +455,7 @@ void hfsplus_delete_inode(struct inode *inode)
                inode->i_size = 0;
                hfsplus_file_truncate(inode);
        }
-        sb->s_dirt = 1;
+        hfsplus_mark_mdb_dirty(sb);
 }
 void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a9bca4b8768b..473332098013 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -124,7 +124,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
        if (fork->total_size != cpu_to_be64(inode->i_size)) {
                set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags);
-                inode->i_sb->s_dirt = 1;
+                hfsplus_mark_mdb_dirty(inode->i_sb);
        }
        hfsplus_inode_write_fork(inode, fork);
        if (tree)
@@ -161,7 +161,7 @@ static void hfsplus_evict_inode(struct inode *inode)
        }
 }
-int hfsplus_sync_fs(struct super_block *sb, int wait)
+static int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
        struct hfsplus_vh *vhdr = sbi->s_vhdr;
@@ -171,9 +171,7 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
        if (!wait)
                return 0;
-        dprint(DBG_SUPER, "hfsplus_write_super\n");
+        dprint(DBG_SUPER, "hfsplus_sync_fs\n");
-        sb->s_dirt = 0;
        /*
         * Explicitly write out the special metadata inodes.
@@ -226,12 +224,34 @@ out:
        return error;
 }
-static void hfsplus_write_super(struct super_block *sb)
+static void delayed_sync_fs(struct work_struct *work)
 {
-        if (!(sb->s_flags & MS_RDONLY))
+        struct hfsplus_sb_info *sbi;
-                hfsplus_sync_fs(sb, 1);
-        else
+        sbi = container_of(work, struct hfsplus_sb_info, sync_work.work);
-                sb->s_dirt = 0;
+        spin_lock(&sbi->work_lock);
+        sbi->work_queued = 0;
+        spin_unlock(&sbi->work_lock);
+        hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
+}
+void hfsplus_mark_mdb_dirty(struct super_block *sb)
+{
+        struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
+        unsigned long delay;
+        if (sb->s_flags & MS_RDONLY)
+                return;
+        spin_lock(&sbi->work_lock);
+        if (!sbi->work_queued) {
+                delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+                queue_delayed_work(system_long_wq, &sbi->sync_work, delay);
+                sbi->work_queued = 1;
+        }
+        spin_unlock(&sbi->work_lock);
 }
 static void hfsplus_put_super(struct super_block *sb)
@@ -240,8 +260,7 @@ static void hfsplus_put_super(struct super_block *sb)
        dprint(DBG_SUPER, "hfsplus_put_super\n");
-        if (!sb->s_fs_info)
+        cancel_delayed_work_sync(&sbi->sync_work);
-                return;
        if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
                struct hfsplus_vh *vhdr = sbi->s_vhdr;
@@ -328,7 +347,6 @@ static const struct super_operations hfsplus_sops = {
        .write_inode    = hfsplus_write_inode,
        .evict_inode    = hfsplus_evict_inode,
        .put_super      = hfsplus_put_super,
-        .write_super    = hfsplus_write_super,
        .sync_fs        = hfsplus_sync_fs,
        .statfs         = hfsplus_statfs,
        .remount_fs     = hfsplus_remount,
@@ -355,6 +373,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_fs_info = sbi;
        mutex_init(&sbi->alloc_mutex);
        mutex_init(&sbi->vh_mutex);
+        spin_lock_init(&sbi->work_lock);
+        INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
        hfsplus_fill_defaults(sbi);
        err = -EINVAL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2afa5bbccf9b..124146543aa7 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -553,7 +553,7 @@ static int read_name(struct inode *ino, char *name)
 }
 int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                  struct nameidata *nd)
+                  bool excl)
 {
        struct inode *inode;
        char *name;
@@ -595,7 +595,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
-                             struct nameidata *nd)
+                             unsigned int flags)
 {
        struct inode *inode;
        char *name;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b8472f803f4e..78e12b2e0ea2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -189,7 +189,7 @@ out:
 *            to tell read_inode to read fnode or not.
 */
-struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c07ef1f1ced6..ac1ead194db5 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -220,7 +220,7 @@ extern const struct dentry_operations hpfs_dentry_operations;
 /* dir.c */
-struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+struct dentry *hpfs_lookup(struct inode *, struct dentry *, unsigned int);
 extern const struct file_operations hpfs_dir_ops;
 /* dnode.c */
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 9083ef8af58c..bc9082482f68 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -115,7 +115,7 @@ bail:
        return err;
 }
-static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d4f93b52cec5..c1dffe47fde2 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -138,7 +138,7 @@ static int file_removed(struct dentry *dentry, const char *file)
 }
 static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        struct dentry *proc_dentry, *parent;
        struct qstr *name = &dentry->d_name;
@@ -420,8 +420,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
 {
        const struct cred *cred = file->f_cred;
        struct hppfs_private *data;
-        struct vfsmount *proc_mnt;
+        struct path path;
-        struct dentry *proc_dentry;
        char *host_file;
        int err, fd, type, filter;
@@ -434,12 +433,11 @@ static int hppfs_open(struct inode *inode, struct file *file)
        if (host_file == NULL)
                goto out_free2;
-        proc_dentry = HPPFS_I(inode)->proc_dentry;
+        path.mnt = inode->i_sb->s_fs_info;
-        proc_mnt = inode->i_sb->s_fs_info;
+        path.dentry = HPPFS_I(inode)->proc_dentry;
        /* XXX This isn't closed anywhere */
-        data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
+        data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
-                                      file_mode(file->f_mode), cred);
        err = PTR_ERR(data->proc_file);
        if (IS_ERR(data->proc_file))
                goto out_free1;
@@ -484,8 +482,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
 {
        const struct cred *cred = file->f_cred;
        struct hppfs_private *data;
-        struct vfsmount *proc_mnt;
+        struct path path;
-        struct dentry *proc_dentry;
        int err;
        err = -ENOMEM;
@@ -493,10 +490,9 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
        if (data == NULL)
                goto out;
-        proc_dentry = HPPFS_I(inode)->proc_dentry;
+        path.mnt = inode->i_sb->s_fs_info;
-        proc_mnt = inode->i_sb->s_fs_info;
+        path.dentry = HPPFS_I(inode)->proc_dentry;
-        data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt),
+        data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
-                                      file_mode(file->f_mode), cred);
        err = PTR_ERR(data->proc_file);
        if (IS_ERR(data->proc_file))
                goto out_free;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cc9281b6c628..e13e9bdb0bf5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -565,7 +565,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mod
        return retval;
 }
-static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
        return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/inode.c b/fs/inode.c
index c99163b1b310..775cbabd4fa5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -182,7 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        }
        inode->i_private = NULL;
        inode->i_mapping = mapping;
-        INIT_LIST_HEAD(&inode->i_dentry);       /* buggered by rcu freeing */
+        INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
 #ifdef CONFIG_FS_POSIX_ACL
        inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
 #endif
diff --git a/fs/internal.h b/fs/internal.h
index 18bc216ea09d..a6fd56c68b11 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -42,6 +42,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
 extern void __init chrdev_init(void);
 /*
+ * namei.c
+ */
+extern int __inode_permission(struct inode *, int);
+/*
 * namespace.c
 */
 extern int copy_mount_options(const void __user *, unsigned long *);
@@ -50,8 +55,6 @@ extern int copy_mount_string(const void __user *, char **);
 extern struct vfsmount *lookup_mnt(struct path *);
 extern int finish_automount(struct vfsmount *, struct path *);
-extern void mnt_make_longterm(struct vfsmount *);
-extern void mnt_make_shortterm(struct vfsmount *);
 extern int sb_prepare_remount_readonly(struct super_block *);
 extern void __init mnt_init(void);
@@ -84,9 +87,6 @@ extern struct super_block *user_get_super(dev_t);
 /*
 * open.c
 */
-struct nameidata;
-extern struct file *nameidata_to_filp(struct nameidata *);
-extern void release_open_intent(struct nameidata *);
 struct open_flags {
        int open_flag;
        umode_t mode;
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index aa4356d09eee..1d3804492aa7 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -134,6 +134,7 @@ isofs_export_encode_fh(struct inode *inode,
        len = 3;
        fh32[0] = ei->i_iget5_block;
        fh16[2] = (__u16)ei->i_iget5_offset;  /* fh16 [sic] */
+        fh16[3] = 0;  /* avoid leaking uninitialized data */
        fh32[2] = inode->i_generation;
        if (parent) {
                struct iso_inode_info *eparent;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 0e73f63d9274..3620ad1ea9bc 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -114,7 +114,7 @@ extern int isofs_name_translate(struct iso_directory_record *, char *, struct in
 int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *);
 int get_acorn_filename(struct iso_directory_record *, char *, struct inode *);
-extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *);
+extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern struct buffer_head *isofs_bread(struct inode *, sector_t);
 extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 1e2946f2a69e..c167028844ed 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -163,7 +163,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
        return 0;
 }
-struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        int found;
        unsigned long uninitialized_var(block);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 008bf062fd26..a748fe21465a 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -265,8 +265,11 @@ int journal_recover(journal_t *journal)
        if (!err)
                err = err2;
        /* Flush disk caches to get replayed data on the permanent storage */
-        if (journal->j_flags & JFS_BARRIER)
+        if (journal->j_flags & JFS_BARRIER) {
-                blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+                err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+                if (!err)
+                        err = err2;
+        }
        return err;
 }
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index b56018896d5e..ad7774d32095 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -25,9 +25,9 @@
 static int jffs2_readdir (struct file *, void *, filldir_t);
 static int jffs2_create (struct inode *,struct dentry *,umode_t,
-                         struct nameidata *);
+                         bool);
 static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
-                                    struct nameidata *);
+                                    unsigned int);
 static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
 static int jffs2_unlink (struct inode *,struct dentry *);
 static int jffs2_symlink (struct inode *,struct dentry *,const char *);
@@ -74,7 +74,7 @@ const struct inode_operations jffs2_dir_inode_operations =
   nice and simple
 */
 static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        struct jffs2_inode_info *dir_f;
        struct jffs2_full_dirent *fd = NULL, *fd_list;
@@ -175,7 +175,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
-                        umode_t mode, struct nameidata *nd)
+                        umode_t mode, bool excl)
 {
        struct jffs2_raw_inode *ri;
        struct jffs2_inode_info *f, *dir_f;
@@ -226,8 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
                  __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
                  f->inocache->pino_nlink, inode->i_mapping->nrpages);
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        return 0;
 fail:
@@ -446,8 +446,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
        mutex_unlock(&dir_f->sem);
        jffs2_complete_reservation(c);
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        return 0;
 fail:
@@ -591,8 +591,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
        mutex_unlock(&dir_f->sem);
        jffs2_complete_reservation(c);
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        return 0;
 fail:
@@ -766,8 +766,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
        mutex_unlock(&dir_f->sem);
        jffs2_complete_reservation(c);
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        return 0;
 fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 07c91ca6017d..3b91a7ad6086 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -73,7 +73,7 @@ static inline void free_ea_wmap(struct inode *inode)
 *
 */
 static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        int rc = 0;
        tid_t tid;              /* transaction id */
@@ -176,8 +176,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
                unlock_new_inode(ip);
                iput(ip);
        } else {
-                d_instantiate(dentry, ip);
                unlock_new_inode(ip);
+                d_instantiate(dentry, ip);
        }
      out2:
@@ -309,8 +309,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
                unlock_new_inode(ip);
                iput(ip);
        } else {
-                d_instantiate(dentry, ip);
                unlock_new_inode(ip);
+                d_instantiate(dentry, ip);
        }
      out2:
@@ -1043,8 +1043,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
                unlock_new_inode(ip);
                iput(ip);
        } else {
-                d_instantiate(dentry, ip);
                unlock_new_inode(ip);
+                d_instantiate(dentry, ip);
        }
      out2:
@@ -1424,8 +1424,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
                unlock_new_inode(ip);
                iput(ip);
        } else {
-                d_instantiate(dentry, ip);
                unlock_new_inode(ip);
+                d_instantiate(dentry, ip);
        }
      out1:
@@ -1436,7 +1436,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
        return rc;
 }
-static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
 {
        struct btstack btstack;
        ino_t inum;
@@ -1570,7 +1570,7 @@ out:
        return result;
 }
-static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags)
 {
        /*
         * This is not negative dentry. Always valid.
@@ -1589,7 +1589,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
         * This may be nfsd (or something), anyway, we can't see the
         * intent of this. So, since this can be for creation, drop it.
         */
-        if (!nd)
+        if (!flags)
                return 0;
        /*
@@ -1597,7 +1597,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
         * case sensitive name which is specified by user if this is
         * for creation.
         */
-        if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+        if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
                return 0;
        return 1;
 }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4a82950f412f..c55c7452d285 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -601,6 +601,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
        /* log == NULL indicates read-only mount */
        if (log) {
+                /*
+                 * Write quota structures to quota file, sync_blockdev() will
+                 * write them to disk later
+                 */
+                dquot_writeback_dquots(sb, -1);
                jfs_flush_journal(log, wait);
                jfs_syncpt(log, 0);
        }
diff --git a/fs/libfs.c b/fs/libfs.c
index f86ec27a4230..a74cb1725ac6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -53,7 +53,7 @@ static int simple_delete_dentry(const struct dentry *dentry)
 * Lookup the data. This is trivial - if the dentry didn't already
 * exist, we know it is negative.  Set d_op to delete negative dentries.
 */
-struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        static const struct dentry_operations simple_dentry_operations = {
                .d_delete = simple_delete_dentry,
@@ -222,15 +222,15 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
        const struct super_operations *ops,
        const struct dentry_operations *dops, unsigned long magic)
 {
-        struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
+        struct super_block *s;
        struct dentry *dentry;
        struct inode *root;
        struct qstr d_name = QSTR_INIT(name, strlen(name));
+        s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
        if (IS_ERR(s))
                return ERR_CAST(s);
-        s->s_flags = MS_NOUSER;
        s->s_maxbytes = MAX_LFS_FILESIZE;
        s->s_blocksize = PAGE_SIZE;
        s->s_blocksize_bits = PAGE_SHIFT;
diff --git a/fs/locks.c b/fs/locks.c
index fce6238d52c1..82c353304f9e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
        return 0;
 }
-static int assign_type(struct file_lock *fl, int type)
+static int assign_type(struct file_lock *fl, long type)
 {
        switch (type) {
        case F_RDLCK:
@@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = {
 /*
 * Initialize a lease, use the default lock manager operations
 */
-static int lease_init(struct file *filp, int type, struct file_lock *fl)
+static int lease_init(struct file *filp, long type, struct file_lock *fl)
 {
        if (assign_type(fl, type) != 0)
                return -EINVAL;
@@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
 }
 /* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, int type)
+static struct file_lock *lease_alloc(struct file *filp, long type)
 {
        struct file_lock *fl = locks_alloc_lock();
        int error = -ENOMEM;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index bea5d1b9954b..26e4a941532f 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -349,7 +349,7 @@ static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name)
 }
 static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
-                struct nameidata *nd)
+                unsigned int flags)
 {
        struct page *page;
        struct logfs_disk_dentry *dd;
@@ -502,7 +502,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        struct inode *inode;
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 97bca623d893..345c24b8a6f8 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -519,7 +519,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
        log_super("LogFS: Start mount %x\n", mount_count++);
        err = -EINVAL;
-        sb = sget(type, logfs_sb_test, logfs_sb_set, super);
+        sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super);
        if (IS_ERR(sb)) {
                super->s_devops->put_device(super);
                kfree(super);
@@ -542,7 +542,6 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
        sb->s_maxbytes  = (1ull << 43) - 1;
        sb->s_max_links = LOGFS_LINK_MAX;
        sb->s_op        = &logfs_super_operations;
-        sb->s_flags     = flags | MS_NOATIME;
        err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
        if (err)
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2d0ee1786305..0db73d9dd668 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,7 +18,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
        return err;
 }
-static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode * inode = NULL;
        ino_t ino;
@@ -55,7 +55,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode,
 }
 static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        return minix_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/mount.h b/fs/mount.h
index 4ef36d93e5a2..4f291f9de641 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -22,7 +22,6 @@ struct mount {
        struct vfsmount mnt;
 #ifdef CONFIG_SMP
        struct mnt_pcp __percpu *mnt_pcp;
-        atomic_t mnt_longterm;          /* how many of the refs are longterm */
 #else
        int mnt_count;
        int mnt_writers;
@@ -49,6 +48,8 @@ struct mount {
        int mnt_ghosts;
 };
+#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
 static inline struct mount *real_mount(struct vfsmount *mnt)
 {
        return container_of(mnt, struct mount, mnt);
@@ -59,6 +60,12 @@ static inline int mnt_has_parent(struct mount *mnt)
        return mnt != mnt->mnt_parent;
 }
+static inline int is_mounted(struct vfsmount *mnt)
+{
+        /* neither detached nor internal? */
+        return !IS_ERR_OR_NULL(real_mount(mnt));
+}
 extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
 static inline void get_mnt_ns(struct mnt_namespace *ns)
@@ -67,10 +74,12 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
 }
 struct proc_mounts {
-        struct seq_file m; /* must be the first element */
+        struct seq_file m;
        struct mnt_namespace *ns;
        struct path root;
        int (*show)(struct seq_file *, struct vfsmount *);
 };
+#define proc_mounts(p) (container_of((p), struct proc_mounts, m))
 extern const struct seq_operations mounts_op;
diff --git a/fs/namei.c b/fs/namei.c
index 7d694194024a..2ccc35c4dc24 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -315,31 +315,22 @@ static inline int do_inode_permission(struct inode *inode, int mask)
 }
 /**
- * inode_permission  -  check for access rights to a given inode
+ * __inode_permission - Check for access rights to a given inode
- * @inode:      inode to check permission on
+ * @inode: Inode to check permission on
- * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...)
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
 *
- * Used to check for read/write/execute permissions on an inode.
+ * Check for read/write/execute permissions on an inode.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things.
 *
 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
+ *
+ * This does not check for a read-only file system.  You probably want
+ * inode_permission().
 */
-int inode_permission(struct inode *inode, int mask)
+int __inode_permission(struct inode *inode, int mask)
 {
        int retval;
        if (unlikely(mask & MAY_WRITE)) {
-                umode_t mode = inode->i_mode;
-                /*
-                 * Nobody gets write access to a read-only fs.
-                 */
-                if (IS_RDONLY(inode) &&
-                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
-                        return -EROFS;
                /*
                 * Nobody gets write access to an immutable file.
                 */
@@ -359,6 +350,47 @@ int inode_permission(struct inode *inode, int mask)
 }
 /**
+ * sb_permission - Check superblock-level permissions
+ * @sb: Superblock of inode to check permission on
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Separate out file-system wide checks from inode-specific permission checks.
+ */
+static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
+{
+        if (unlikely(mask & MAY_WRITE)) {
+                umode_t mode = inode->i_mode;
+                /* Nobody gets write access to a read-only fs. */
+                if ((sb->s_flags & MS_RDONLY) &&
+                    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+                        return -EROFS;
+        }
+        return 0;
+}
+/**
+ * inode_permission - Check for access rights to a given inode
+ * @inode: Inode to check permission on
+ * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ *
+ * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
+ * this, letting us set arbitrary permissions for filesystem access without
+ * changing the "normal" UIDs which are used for other things.
+ *
+ * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
+ */
+int inode_permission(struct inode *inode, int mask)
+{
+        int retval;
+        retval = sb_permission(inode->i_sb, inode, mask);
+        if (retval)
+                return retval;
+        return __inode_permission(inode, mask);
+}
+/**
 * path_get - get a reference to a path
 * @path: path to get the reference to
 *
@@ -395,6 +427,18 @@ EXPORT_SYMBOL(path_put);
 * to restart the path walk from the beginning in ref-walk mode.
 */
+static inline void lock_rcu_walk(void)
+{
+        br_read_lock(&vfsmount_lock);
+        rcu_read_lock();
+}
+static inline void unlock_rcu_walk(void)
+{
+        rcu_read_unlock();
+        br_read_unlock(&vfsmount_lock);
+}
 /**
 * unlazy_walk - try to switch to ref-walk mode.
 * @nd: nameidata pathwalk data
@@ -448,8 +492,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
        }
        mntget(nd->path.mnt);
-        rcu_read_unlock();
+        unlock_rcu_walk();
-        br_read_unlock(&vfsmount_lock);
        nd->flags &= ~LOOKUP_RCU;
        return 0;
@@ -463,25 +506,9 @@ err_root:
        return -ECHILD;
 }
-/**
+static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
- * release_open_intent - free up open intent resources
- * @nd: pointer to nameidata
- */
-void release_open_intent(struct nameidata *nd)
 {
-        struct file *file = nd->intent.open.file;
+        return dentry->d_op->d_revalidate(dentry, flags);
-        if (file && !IS_ERR(file)) {
-                if (file->f_path.dentry == NULL)
-                        put_filp(file);
-                else
-                        fput(file);
-        }
-}
-static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
-        return dentry->d_op->d_revalidate(dentry, nd);
 }
 /**
@@ -506,15 +533,13 @@ static int complete_walk(struct nameidata *nd)
                spin_lock(&dentry->d_lock);
                if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
                        spin_unlock(&dentry->d_lock);
-                        rcu_read_unlock();
+                        unlock_rcu_walk();
-                        br_read_unlock(&vfsmount_lock);
                        return -ECHILD;
                }
                BUG_ON(nd->inode != dentry->d_inode);
                spin_unlock(&dentry->d_lock);
                mntget(nd->path.mnt);
-                rcu_read_unlock();
+                unlock_rcu_walk();
-                br_read_unlock(&vfsmount_lock);
        }
        if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -527,7 +552,7 @@ static int complete_walk(struct nameidata *nd)
                return 0;
        /* Note: we do not d_invalidate() */
-        status = d_revalidate(dentry, nd);
+        status = d_revalidate(dentry, nd->flags);
        if (status > 0)
                return 0;
@@ -602,10 +627,25 @@ static inline void path_to_nameidata(const struct path *path,
        nd->path.dentry = path->dentry;
 }
+/*
+ * Helper to directly jump to a known parsed path from ->follow_link,
+ * caller must have taken a reference to path beforehand.
+ */
+void nd_jump_link(struct nameidata *nd, struct path *path)
+{
+        path_put(&nd->path);
+        nd->path = *path;
+        nd->inode = nd->path.dentry->d_inode;
+        nd->flags |= LOOKUP_JUMPED;
+        BUG_ON(nd->inode->i_op->follow_link);
+}
 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
 {
        struct inode *inode = link->dentry->d_inode;
-        if (!IS_ERR(cookie) && inode->i_op->put_link)
+        if (inode->i_op->put_link)
                inode->i_op->put_link(link->dentry, nd, cookie);
        path_put(link);
 }
@@ -613,19 +653,19 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
 static __always_inline int
 follow_link(struct path *link, struct nameidata *nd, void **p)
 {
-        int error;
        struct dentry *dentry = link->dentry;
+        int error;
+        char *s;
        BUG_ON(nd->flags & LOOKUP_RCU);
        if (link->mnt == nd->path.mnt)
                mntget(link->mnt);
-        if (unlikely(current->total_link_count >= 40)) {
+        error = -ELOOP;
-                *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
+        if (unlikely(current->total_link_count >= 40))
-                path_put(&nd->path);
+                goto out_put_nd_path;
-                return -ELOOP;
-        }
        cond_resched();
        current->total_link_count++;
@@ -633,30 +673,28 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
        nd_set_link(nd, NULL);
        error = security_inode_follow_link(link->dentry, nd);
-        if (error) {
+        if (error)
-                *p = ERR_PTR(error); /* no ->put_link(), please */
+                goto out_put_nd_path;
-                path_put(&nd->path);
-                return error;
-        }
        nd->last_type = LAST_BIND;
        *p = dentry->d_inode->i_op->follow_link(dentry, nd);
        error = PTR_ERR(*p);
-        if (!IS_ERR(*p)) {
+        if (IS_ERR(*p))
-                char *s = nd_get_link(nd);
+                goto out_put_nd_path;
-                error = 0;
-                if (s)
+        error = 0;
-                        error = __vfs_follow_link(nd, s);
+        s = nd_get_link(nd);
-                else if (nd->last_type == LAST_BIND) {
+        if (s) {
-                        nd->flags |= LOOKUP_JUMPED;
+                error = __vfs_follow_link(nd, s);
-                        nd->inode = nd->path.dentry->d_inode;
+                if (unlikely(error))
-                        if (nd->inode->i_op->follow_link) {
+                        put_link(nd, link, *p);
-                                /* stepped on a _really_ weird one */
-                                path_put(&nd->path);
-                                error = -ELOOP;
-                        }
-                }
        }
+        return error;
+out_put_nd_path:
+        path_put(&nd->path);
+        path_put(link);
        return error;
 }
@@ -675,6 +713,16 @@ static int follow_up_rcu(struct path *path)
        return 1;
 }
+/*
+ * follow_up - Find the mountpoint of path's vfsmount
+ *
+ * Given a path, find the mountpoint of its source file system.
+ * Replace @path with the path of the mountpoint in the parent mount.
+ * Up is towards /.
+ *
+ * Return 1 if we went up a level and 0 if we were already at the
+ * root.
+ */
 int follow_up(struct path *path)
 {
        struct mount *mnt = real_mount(path->mnt);
@@ -683,7 +731,7 @@ int follow_up(struct path *path)
        br_read_lock(&vfsmount_lock);
        parent = mnt->mnt_parent;
-        if (&parent->mnt == path->mnt) {
+        if (parent == mnt) {
                br_read_unlock(&vfsmount_lock);
                return 0;
        }
@@ -946,8 +994,7 @@ failed:
        nd->flags &= ~LOOKUP_RCU;
        if (!(nd->flags & LOOKUP_ROOT))
                nd->root.mnt = NULL;
-        rcu_read_unlock();
+        unlock_rcu_walk();
-        br_read_unlock(&vfsmount_lock);
        return -ECHILD;
 }
@@ -1048,7 +1095,7 @@ static void follow_dotdot(struct nameidata *nd)
 * dir->d_inode->i_mutex must be held
 */
 static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
-                                    struct nameidata *nd, bool *need_lookup)
+                                    unsigned int flags, bool *need_lookup)
 {
        struct dentry *dentry;
        int error;
@@ -1059,7 +1106,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
                if (d_need_lookup(dentry)) {
                        *need_lookup = true;
                } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
-                        error = d_revalidate(dentry, nd);
+                        error = d_revalidate(dentry, flags);
                        if (unlikely(error <= 0)) {
                                if (error < 0) {
                                        dput(dentry);
@@ -1089,7 +1136,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 * dir->d_inode->i_mutex must be held
 */
 static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
-                                  struct nameidata *nd)
+                                  unsigned int flags)
 {
        struct dentry *old;
@@ -1099,7 +1146,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
                return ERR_PTR(-ENOENT);
        }
-        old = dir->i_op->lookup(dir, dentry, nd);
+        old = dir->i_op->lookup(dir, dentry, flags);
        if (unlikely(old)) {
                dput(dentry);
                dentry = old;
@@ -1108,16 +1155,16 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
 }
 static struct dentry *__lookup_hash(struct qstr *name,
-                struct dentry *base, struct nameidata *nd)
+                struct dentry *base, unsigned int flags)
 {
        bool need_lookup;
        struct dentry *dentry;
-        dentry = lookup_dcache(name, base, nd, &need_lookup);
+        dentry = lookup_dcache(name, base, flags, &need_lookup);
        if (!need_lookup)
                return dentry;
-        return lookup_real(base->d_inode, dentry, nd);
+        return lookup_real(base->d_inode, dentry, flags);
 }
 /*
@@ -1167,7 +1214,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
                if (unlikely(d_need_lookup(dentry)))
                        goto unlazy;
                if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
-                        status = d_revalidate(dentry, nd);
+                        status = d_revalidate(dentry, nd->flags);
                        if (unlikely(status <= 0)) {
                                if (status != -ECHILD)
                                        need_reval = 0;
@@ -1197,7 +1244,7 @@ unlazy:
        }
        if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
-                status = d_revalidate(dentry, nd);
+                status = d_revalidate(dentry, nd->flags);
        if (unlikely(status <= 0)) {
                if (status < 0) {
                        dput(dentry);
@@ -1236,7 +1283,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name,
        BUG_ON(nd->inode != parent->d_inode);
        mutex_lock(&parent->d_inode->i_mutex);
-        dentry = __lookup_hash(name, parent, nd);
+        dentry = __lookup_hash(name, parent, nd->flags);
        mutex_unlock(&parent->d_inode->i_mutex);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
@@ -1284,8 +1331,7 @@ static void terminate_walk(struct nameidata *nd)
                nd->flags &= ~LOOKUP_RCU;
                if (!(nd->flags & LOOKUP_ROOT))
                        nd->root.mnt = NULL;
-                rcu_read_unlock();
+                unlock_rcu_walk();
-                br_read_unlock(&vfsmount_lock);
        }
 }
@@ -1383,9 +1429,10 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
                void *cookie;
                res = follow_link(&link, nd, &cookie);
-                if (!res)
+                if (res)
-                        res = walk_component(nd, path, &nd->last,
+                        break;
-                                             nd->last_type, LOOKUP_FOLLOW);
+                res = walk_component(nd, path, &nd->last,
+                                     nd->last_type, LOOKUP_FOLLOW);
                put_link(nd, &link, cookie);
        } while (res > 0);
@@ -1651,8 +1698,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                nd->path = nd->root;
                nd->inode = inode;
                if (flags & LOOKUP_RCU) {
-                        br_read_lock(&vfsmount_lock);
+                        lock_rcu_walk();
-                        rcu_read_lock();
                        nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
                } else {
                        path_get(&nd->path);
@@ -1664,8 +1710,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
        if (*name=='/') {
                if (flags & LOOKUP_RCU) {
-                        br_read_lock(&vfsmount_lock);
+                        lock_rcu_walk();
-                        rcu_read_lock();
                        set_root_rcu(nd);
                } else {
                        set_root(nd);
@@ -1677,8 +1722,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                        struct fs_struct *fs = current->fs;
                        unsigned seq;
-                        br_read_lock(&vfsmount_lock);
+                        lock_rcu_walk();
-                        rcu_read_lock();
                        do {
                                seq = read_seqcount_begin(&fs->seq);
@@ -1713,8 +1757,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                        if (fput_needed)
                                *fp = file;
                        nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
-                        br_read_lock(&vfsmount_lock);
+                        lock_rcu_walk();
-                        rcu_read_lock();
                } else {
                        path_get(&file->f_path);
                        fput_light(file, fput_needed);
@@ -1777,8 +1820,9 @@ static int path_lookupat(int dfd, const char *name,
                        struct path link = path;
                        nd->flags |= LOOKUP_PARENT;
                        err = follow_link(&link, nd, &cookie);
-                        if (!err)
+                        if (err)
-                                err = lookup_last(nd, &path);
+                                break;
+                        err = lookup_last(nd, &path);
                        put_link(nd, &link, cookie);
                }
        }
@@ -1821,9 +1865,27 @@ static int do_path_lookup(int dfd, const char *name,
        return retval;
 }
-int kern_path_parent(const char *name, struct nameidata *nd)
+/* does lookup, returns the object with parent locked */
+struct dentry *kern_path_locked(const char *name, struct path *path)
 {
-        return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
+        struct nameidata nd;
+        struct dentry *d;
+        int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd);
+        if (err)
+                return ERR_PTR(err);
+        if (nd.last_type != LAST_NORM) {
+                path_put(&nd.path);
+                return ERR_PTR(-EINVAL);
+        }
+        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+        d = __lookup_hash(&nd.last, nd.path.dentry, 0);
+        if (IS_ERR(d)) {
+                mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+                path_put(&nd.path);
+                return d;
+        }
+        *path = nd.path;
+        return d;
 }
 int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1866,7 +1928,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 */
 static struct dentry *lookup_hash(struct nameidata *nd)
 {
-        return __lookup_hash(&nd->last, nd->path.dentry, nd);
+        return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
 }
 /**
@@ -1913,7 +1975,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
        if (err)
                return ERR_PTR(err);
-        return __lookup_hash(&this, base, NULL);
+        return __lookup_hash(&this, base, 0);
 }
 int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
@@ -2086,10 +2148,9 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 }
 int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool want_excl)
 {
        int error = may_create(dir, dentry);
        if (error)
                return error;
@@ -2100,7 +2161,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        error = security_inode_create(dir, dentry, mode);
        if (error)
                return error;
-        error = dir->i_op->create(dir, dentry, mode, nd);
+        error = dir->i_op->create(dir, dentry, mode, want_excl);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
@@ -2187,21 +2248,275 @@ static inline int open_to_namei_flags(int flag)
        return flag;
 }
+static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+{
+        int error = security_path_mknod(dir, dentry, mode, 0);
+        if (error)
+                return error;
+        error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+        if (error)
+                return error;
+        return security_inode_create(dir->dentry->d_inode, dentry, mode);
+}
 /*
- * Handle the last step of open()
+ * Attempt to atomically look up, create and open a file from a negative
+ * dentry.
+ *
+ * Returns 0 if successful.  The file will have been created and attached to
+ * @file by the filesystem calling finish_open().
+ *
+ * Returns 1 if the file was looked up only or didn't need creating.  The
+ * caller will need to perform the open themselves.  @path will have been
+ * updated to point to the new dentry.  This may be negative.
+ *
+ * Returns an error code otherwise.
+ */
+static int atomic_open(struct nameidata *nd, struct dentry *dentry,
+                        struct path *path, struct file *file,
+                        const struct open_flags *op,
+                        bool *want_write, bool need_lookup,
+                        int *opened)
+{
+        struct inode *dir =  nd->path.dentry->d_inode;
+        unsigned open_flag = open_to_namei_flags(op->open_flag);
+        umode_t mode;
+        int error;
+        int acc_mode;
+        int create_error = 0;
+        struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+        BUG_ON(dentry->d_inode);
+        /* Don't create child dentry for a dead directory. */
+        if (unlikely(IS_DEADDIR(dir))) {
+                error = -ENOENT;
+                goto out;
+        }
+        mode = op->mode & S_IALLUGO;
+        if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
+                mode &= ~current_umask();
+        if (open_flag & O_EXCL) {
+                open_flag &= ~O_TRUNC;
+                *opened |= FILE_CREATED;
+        }
+        /*
+         * Checking write permission is tricky, bacuse we don't know if we are
+         * going to actually need it: O_CREAT opens should work as long as the
+         * file exists.  But checking existence breaks atomicity.  The trick is
+         * to check access and if not granted clear O_CREAT from the flags.
+         *
+         * Another problem is returing the "right" error value (e.g. for an
+         * O_EXCL open we want to return EEXIST not EROFS).
+         */
+        if ((open_flag & (O_CREAT | O_TRUNC)) ||
+            (open_flag & O_ACCMODE) != O_RDONLY) {
+                error = mnt_want_write(nd->path.mnt);
+                if (!error) {
+                        *want_write = true;
+                } else if (!(open_flag & O_CREAT)) {
+                        /*
+                         * No O_CREATE -> atomicity not a requirement -> fall
+                         * back to lookup + open
+                         */
+                        goto no_open;
+                } else if (open_flag & (O_EXCL | O_TRUNC)) {
+                        /* Fall back and fail with the right error */
+                        create_error = error;
+                        goto no_open;
+                } else {
+                        /* No side effects, safe to clear O_CREAT */
+                        create_error = error;
+                        open_flag &= ~O_CREAT;
+                }
+        }
+        if (open_flag & O_CREAT) {
+                error = may_o_create(&nd->path, dentry, op->mode);
+                if (error) {
+                        create_error = error;
+                        if (open_flag & O_EXCL)
+                                goto no_open;
+                        open_flag &= ~O_CREAT;
+                }
+        }
+        if (nd->flags & LOOKUP_DIRECTORY)
+                open_flag |= O_DIRECTORY;
+        file->f_path.dentry = DENTRY_NOT_SET;
+        file->f_path.mnt = nd->path.mnt;
+        error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode,
+                                      opened);
+        if (error < 0) {
+                if (create_error && error == -ENOENT)
+                        error = create_error;
+                goto out;
+        }
+        acc_mode = op->acc_mode;
+        if (*opened & FILE_CREATED) {
+                fsnotify_create(dir, dentry);
+                acc_mode = MAY_OPEN;
+        }
+        if (error) {    /* returned 1, that is */
+                if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
+                        error = -EIO;
+                        goto out;
+                }
+                if (file->f_path.dentry) {
+                        dput(dentry);
+                        dentry = file->f_path.dentry;
+                }
+                goto looked_up;
+        }
+        /*
+         * We didn't have the inode before the open, so check open permission
+         * here.
+         */
+        error = may_open(&file->f_path, acc_mode, open_flag);
+        if (error)
+                fput(file);
+out:
+        dput(dentry);
+        return error;
+no_open:
+        if (need_lookup) {
+                dentry = lookup_real(dir, dentry, nd->flags);
+                if (IS_ERR(dentry))
+                        return PTR_ERR(dentry);
+                if (create_error) {
+                        int open_flag = op->open_flag;
+                        error = create_error;
+                        if ((open_flag & O_EXCL)) {
+                                if (!dentry->d_inode)
+                                        goto out;
+                        } else if (!dentry->d_inode) {
+                                goto out;
+                        } else if ((open_flag & O_TRUNC) &&
+                                   S_ISREG(dentry->d_inode->i_mode)) {
+                                goto out;
+                        }
+                        /* will fail later, go on to get the right error */
+                }
+        }
+looked_up:
+        path->dentry = dentry;
+        path->mnt = nd->path.mnt;
+        return 1;
+}
+/*
+ * Look up and maybe create and open the last component.
+ *
+ * Must be called with i_mutex held on parent.
+ *
+ * Returns 0 if the file was successfully atomically created (if necessary) and
+ * opened.  In this case the file will be returned attached to @file.
+ *
+ * Returns 1 if the file was not completely opened at this time, though lookups
+ * and creations will have been performed and the dentry returned in @path will
+ * be positive upon return if O_CREAT was specified.  If O_CREAT wasn't
+ * specified then a negative dentry may be returned.
+ *
+ * An error code is returned otherwise.
+ *
+ * FILE_CREATE will be set in @*opened if the dentry was created and will be
+ * cleared otherwise prior to returning.
 */
-static struct file *do_last(struct nameidata *nd, struct path *path,
+static int lookup_open(struct nameidata *nd, struct path *path,
-                            const struct open_flags *op, const char *pathname)
+                        struct file *file,
+                        const struct open_flags *op,
+                        bool *want_write, int *opened)
 {
        struct dentry *dir = nd->path.dentry;
+        struct inode *dir_inode = dir->d_inode;
        struct dentry *dentry;
+        int error;
+        bool need_lookup;
+        *opened &= ~FILE_CREATED;
+        dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
+        if (IS_ERR(dentry))
+                return PTR_ERR(dentry);
+        /* Cached positive dentry: will open in f_op->open */
+        if (!need_lookup && dentry->d_inode)
+                goto out_no_open;
+        if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
+                return atomic_open(nd, dentry, path, file, op, want_write,
+                                   need_lookup, opened);
+        }
+        if (need_lookup) {
+                BUG_ON(dentry->d_inode);
+                dentry = lookup_real(dir_inode, dentry, nd->flags);
+                if (IS_ERR(dentry))
+                        return PTR_ERR(dentry);
+        }
+        /* Negative dentry, just create the file */
+        if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
+                umode_t mode = op->mode;
+                if (!IS_POSIXACL(dir->d_inode))
+                        mode &= ~current_umask();
+                /*
+                 * This write is needed to ensure that a
+                 * rw->ro transition does not occur between
+                 * the time when the file is created and when
+                 * a permanent write count is taken through
+                 * the 'struct file' in finish_open().
+                 */
+                error = mnt_want_write(nd->path.mnt);
+                if (error)
+                        goto out_dput;
+                *want_write = true;
+                *opened |= FILE_CREATED;
+                error = security_path_mknod(&nd->path, dentry, mode, 0);
+                if (error)
+                        goto out_dput;
+                error = vfs_create(dir->d_inode, dentry, mode,
+                                   nd->flags & LOOKUP_EXCL);
+                if (error)
+                        goto out_dput;
+        }
+out_no_open:
+        path->dentry = dentry;
+        path->mnt = nd->path.mnt;
+        return 1;
+out_dput:
+        dput(dentry);
+        return error;
+}
+/*
+ * Handle the last step of open()
+ */
+static int do_last(struct nameidata *nd, struct path *path,
+                   struct file *file, const struct open_flags *op,
+                   int *opened, const char *pathname)
+{
+        struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
-        int will_truncate = open_flag & O_TRUNC;
+        bool will_truncate = (open_flag & O_TRUNC) != 0;
-        int want_write = 0;
+        bool want_write = false;
        int acc_mode = op->acc_mode;
-        struct file *filp;
        struct inode *inode;
-        int symlink_ok = 0;
+        bool symlink_ok = false;
        struct path save_parent = { .dentry = NULL, .mnt = NULL };
        bool retried = false;
        int error;
@@ -2214,112 +2529,99 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
        case LAST_DOT:
                error = handle_dots(nd, nd->last_type);
                if (error)
-                        return ERR_PTR(error);
+                        return error;
                /* fallthrough */
        case LAST_ROOT:
                error = complete_walk(nd);
                if (error)
-                        return ERR_PTR(error);
+                        return error;
                audit_inode(pathname, nd->path.dentry);
                if (open_flag & O_CREAT) {
                        error = -EISDIR;
-                        goto exit;
+                        goto out;
                }
-                goto ok;
+                goto finish_open;
        case LAST_BIND:
                error = complete_walk(nd);
                if (error)
-                        return ERR_PTR(error);
+                        return error;
                audit_inode(pathname, dir);
-                goto ok;
+                goto finish_open;
        }
        if (!(open_flag & O_CREAT)) {
                if (nd->last.name[nd->last.len])
                        nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
                if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
-                        symlink_ok = 1;
+                        symlink_ok = true;
                /* we _can_ be in RCU mode here */
                error = lookup_fast(nd, &nd->last, path, &inode);
-                if (unlikely(error)) {
+                if (likely(!error))
-                        if (error < 0)
+                        goto finish_lookup;
-                                goto exit;
-                        error = lookup_slow(nd, &nd->last, path);
+                if (error < 0)
-                        if (error < 0)
+                        goto out;
-                                goto exit;
-                        inode = path->dentry->d_inode;
+                BUG_ON(nd->inode != dir->d_inode);
-                }
+        } else {
-                goto finish_lookup;
+                /* create side of things */
-        }
+                /*
+                 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
-        /* create side of things */
+                 * has been cleared when we got to the last component we are
-        /*
+                 * about to look up
-         * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been
+                 */
-         * cleared when we got to the last component we are about to look up
+                error = complete_walk(nd);
-         */
+                if (error)
-        error = complete_walk(nd);
+                        return error;
-        if (error)
-                return ERR_PTR(error);
-        audit_inode(pathname, dir);
+                audit_inode(pathname, dir);
-        error = -EISDIR;
+                error = -EISDIR;
-        /* trailing slashes? */
+                /* trailing slashes? */
-        if (nd->last.name[nd->last.len])
+                if (nd->last.name[nd->last.len])
-                goto exit;
+                        goto out;
+        }
 retry_lookup:
        mutex_lock(&dir->d_inode->i_mutex);
+        error = lookup_open(nd, path, file, op, &want_write, opened);
+        mutex_unlock(&dir->d_inode->i_mutex);
-        dentry = lookup_hash(nd);
+        if (error <= 0) {
-        error = PTR_ERR(dentry);
+                if (error)
-        if (IS_ERR(dentry)) {
+                        goto out;
-                mutex_unlock(&dir->d_inode->i_mutex);
-                goto exit;
-        }
-        path->dentry = dentry;
+                if ((*opened & FILE_CREATED) ||
-        path->mnt = nd->path.mnt;
+                    !S_ISREG(file->f_path.dentry->d_inode->i_mode))
+                        will_truncate = false;
-        /* Negative dentry, just create the file */
+                audit_inode(pathname, file->f_path.dentry);
-        if (!dentry->d_inode) {
+                goto opened;
-                umode_t mode = op->mode;
+        }
-                if (!IS_POSIXACL(dir->d_inode))
-                        mode &= ~current_umask();
+        if (*opened & FILE_CREATED) {
-                /*
-                 * This write is needed to ensure that a
-                 * rw->ro transition does not occur between
-                 * the time when the file is created and when
-                 * a permanent write count is taken through
-                 * the 'struct file' in nameidata_to_filp().
-                 */
-                error = mnt_want_write(nd->path.mnt);
-                if (error)
-                        goto exit_mutex_unlock;
-                want_write = 1;
                /* Don't check for write permission, don't truncate */
                open_flag &= ~O_TRUNC;
-                will_truncate = 0;
+                will_truncate = false;
                acc_mode = MAY_OPEN;
-                error = security_path_mknod(&nd->path, dentry, mode, 0);
+                path_to_nameidata(path, nd);
-                if (error)
+                goto finish_open_created;
-                        goto exit_mutex_unlock;
-                error = vfs_create(dir->d_inode, dentry, mode, nd);
-                if (error)
-                        goto exit_mutex_unlock;
-                mutex_unlock(&dir->d_inode->i_mutex);
-                dput(nd->path.dentry);
-                nd->path.dentry = dentry;
-                goto common;
        }
        /*
         * It already exists.
         */
-        mutex_unlock(&dir->d_inode->i_mutex);
        audit_inode(pathname, path->dentry);
+        /*
+         * If atomic_open() acquired write access it is dropped now due to
+         * possible mount and symlink following (this might be optimized away if
+         * necessary...)
+         */
+        if (want_write) {
+                mnt_drop_write(nd->path.mnt);
+                want_write = false;
+        }
        error = -EEXIST;
        if (open_flag & O_EXCL)
                goto exit_dput;
@@ -2338,18 +2640,18 @@ finish_lookup:
        error = -ENOENT;
        if (!inode) {
                path_to_nameidata(path, nd);
-                goto exit;
+                goto out;
        }
        if (should_follow_link(inode, !symlink_ok)) {
                if (nd->flags & LOOKUP_RCU) {
                        if (unlikely(unlazy_walk(nd, path->dentry))) {
                                error = -ECHILD;
-                                goto exit;
+                                goto out;
                        }
                }
                BUG_ON(inode != path->dentry->d_inode);
-                return NULL;
+                return 1;
        }
        if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
@@ -2365,119 +2667,122 @@ finish_lookup:
        error = complete_walk(nd);
        if (error) {
                path_put(&save_parent);
-                return ERR_PTR(error);
+                return error;
        }
        error = -EISDIR;
        if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
-                goto exit;
+                goto out;
        error = -ENOTDIR;
        if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
-                goto exit;
+                goto out;
        audit_inode(pathname, nd->path.dentry);
-ok:
+finish_open:
        if (!S_ISREG(nd->inode->i_mode))
-                will_truncate = 0;
+                will_truncate = false;
        if (will_truncate) {
                error = mnt_want_write(nd->path.mnt);
                if (error)
-                        goto exit;
+                        goto out;
-                want_write = 1;
+                want_write = true;
        }
-common:
+finish_open_created:
        error = may_open(&nd->path, acc_mode, open_flag);
        if (error)
-                goto exit;
+                goto out;
-        filp = nameidata_to_filp(nd);
+        file->f_path.mnt = nd->path.mnt;
-        if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
+        error = finish_open(file, nd->path.dentry, NULL, opened);
-                BUG_ON(save_parent.dentry != dir);
+        if (error) {
-                path_put(&nd->path);
+                if (error == -EOPENSTALE)
-                nd->path = save_parent;
+                        goto stale_open;
-                nd->inode = dir->d_inode;
+                goto out;
-                save_parent.mnt = NULL;
-                save_parent.dentry = NULL;
-                if (want_write) {
-                        mnt_drop_write(nd->path.mnt);
-                        want_write = 0;
-                }
-                retried = true;
-                goto retry_lookup;
-        }
-        if (!IS_ERR(filp)) {
-                error = ima_file_check(filp, op->acc_mode);
-                if (error) {
-                        fput(filp);
-                        filp = ERR_PTR(error);
-                }
        }
-        if (!IS_ERR(filp)) {
+opened:
-                if (will_truncate) {
+        error = open_check_o_direct(file);
-                        error = handle_truncate(filp);
+        if (error)
-                        if (error) {
+                goto exit_fput;
-                                fput(filp);
+        error = ima_file_check(file, op->acc_mode);
-                                filp = ERR_PTR(error);
+        if (error)
-                        }
+                goto exit_fput;
-                }
+        if (will_truncate) {
+                error = handle_truncate(file);
+                if (error)
+                        goto exit_fput;
        }
 out:
        if (want_write)
                mnt_drop_write(nd->path.mnt);
        path_put(&save_parent);
        terminate_walk(nd);
-        return filp;
+        return error;
-exit_mutex_unlock:
-        mutex_unlock(&dir->d_inode->i_mutex);
 exit_dput:
        path_put_conditional(path, nd);
-exit:
-        filp = ERR_PTR(error);
        goto out;
+exit_fput:
+        fput(file);
+        goto out;
+stale_open:
+        /* If no saved parent or already retried then can't retry */
+        if (!save_parent.dentry || retried)
+                goto out;
+        BUG_ON(save_parent.dentry != dir);
+        path_put(&nd->path);
+        nd->path = save_parent;
+        nd->inode = dir->d_inode;
+        save_parent.mnt = NULL;
+        save_parent.dentry = NULL;
+        if (want_write) {
+                mnt_drop_write(nd->path.mnt);
+                want_write = false;
+        }
+        retried = true;
+        goto retry_lookup;
 }
 static struct file *path_openat(int dfd, const char *pathname,
                struct nameidata *nd, const struct open_flags *op, int flags)
 {
        struct file *base = NULL;
-        struct file *filp;
+        struct file *file;
        struct path path;
+        int opened = 0;
        int error;
-        filp = get_empty_filp();
+        file = get_empty_filp();
-        if (!filp)
+        if (!file)
                return ERR_PTR(-ENFILE);
-        filp->f_flags = op->open_flag;
+        file->f_flags = op->open_flag;
-        nd->intent.open.file = filp;
-        nd->intent.open.flags = open_to_namei_flags(op->open_flag);
-        nd->intent.open.create_mode = op->mode;
        error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
        if (unlikely(error))
-                goto out_filp;
+                goto out;
        current->total_link_count = 0;
        error = link_path_walk(pathname, nd);
        if (unlikely(error))
-                goto out_filp;
+                goto out;
-        filp = do_last(nd, &path, op, pathname);
+        error = do_last(nd, &path, file, op, &opened, pathname);
-        while (unlikely(!filp)) { /* trailing symlink */
+        while (unlikely(error > 0)) { /* trailing symlink */
                struct path link = path;
                void *cookie;
                if (!(nd->flags & LOOKUP_FOLLOW)) {
                        path_put_conditional(&path, nd);
                        path_put(&nd->path);
-                        filp = ERR_PTR(-ELOOP);
+                        error = -ELOOP;
                        break;
                }
                nd->flags |= LOOKUP_PARENT;
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
                error = follow_link(&link, nd, &cookie);
                if (unlikely(error))
-                        filp = ERR_PTR(error);
+                        break;
-                else
+                error = do_last(nd, &path, file, op, &opened, pathname);
-                        filp = do_last(nd, &path, op, pathname);
                put_link(nd, &link, cookie);
        }
 out:
@@ -2485,18 +2790,20 @@ out:
                path_put(&nd->root);
        if (base)
                fput(base);
-        release_open_intent(nd);
+        if (!(opened & FILE_OPENED)) {
-        if (filp == ERR_PTR(-EOPENSTALE)) {
+                BUG_ON(!error);
-                if (flags & LOOKUP_RCU)
+                put_filp(file);
-                        filp = ERR_PTR(-ECHILD);
-                else
-                        filp = ERR_PTR(-ESTALE);
        }
-        return filp;
+        if (unlikely(error)) {
+                if (error == -EOPENSTALE) {
-out_filp:
+                        if (flags & LOOKUP_RCU)
-        filp = ERR_PTR(error);
+                                error = -ECHILD;
-        goto out;
+                        else
+                                error = -ESTALE;
+                }
+                file = ERR_PTR(error);
+        }
+        return file;
 }
 struct file *do_filp_open(int dfd, const char *pathname,
@@ -2551,7 +2858,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
                goto out;
        nd.flags &= ~LOOKUP_PARENT;
        nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
-        nd.intent.open.flags = O_EXCL;
        /*
         * Do the final lookup.
@@ -2670,7 +2976,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
                goto out_drop_write;
        switch (mode & S_IFMT) {
                case 0: case S_IFREG:
-                        error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
+                        error = vfs_create(path.dentry->d_inode,dentry,mode,true);
                        break;
                case S_IFCHR: case S_IFBLK:
                        error = vfs_mknod(path.dentry->d_inode,dentry,mode,
diff --git a/fs/namespace.c b/fs/namespace.c
index 1e4a5fe3d7b7..c53d3381b0d0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -515,8 +515,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
 }
 /*
- * lookup_mnt increments the ref count before returning
+ * lookup_mnt - Return the first child mount mounted at path
- * the vfsmount struct.
+ *
+ * "First" means first mounted chronologically.  If you create the
+ * following mounts:
+ *
+ * mount /dev/sda1 /mnt
+ * mount /dev/sda2 /mnt
+ * mount /dev/sda3 /mnt
+ *
+ * Then lookup_mnt() on the base /mnt dentry in the root mount will
+ * return successively the root dentry and vfsmount of /dev/sda1, then
+ * /dev/sda2, then /dev/sda3, then NULL.
+ *
+ * lookup_mnt takes a reference to the found vfsmount.
 */
 struct vfsmount *lookup_mnt(struct path *path)
 {
@@ -621,21 +633,6 @@ static void attach_mnt(struct mount *mnt, struct path *path)
        list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
 }
-static inline void __mnt_make_longterm(struct mount *mnt)
-{
-#ifdef CONFIG_SMP
-        atomic_inc(&mnt->mnt_longterm);
-#endif
-}
-/* needs vfsmount lock for write */
-static inline void __mnt_make_shortterm(struct mount *mnt)
-{
-#ifdef CONFIG_SMP
-        atomic_dec(&mnt->mnt_longterm);
-#endif
-}
 /*
 * vfsmount lock must be held for write
 */
@@ -649,10 +646,8 @@ static void commit_tree(struct mount *mnt)
        BUG_ON(parent == mnt);
        list_add_tail(&head, &mnt->mnt_list);
-        list_for_each_entry(m, &head, mnt_list) {
+        list_for_each_entry(m, &head, mnt_list)
                m->mnt_ns = n;
-                __mnt_make_longterm(m);
-        }
        list_splice(&head, n->list.prev);
@@ -725,56 +720,60 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                                        int flag)
 {
        struct super_block *sb = old->mnt.mnt_sb;
-        struct mount *mnt = alloc_vfsmnt(old->mnt_devname);
+        struct mount *mnt;
+        int err;
-        if (mnt) {
+        mnt = alloc_vfsmnt(old->mnt_devname);
-                if (flag & (CL_SLAVE | CL_PRIVATE))
+        if (!mnt)
-                        mnt->mnt_group_id = 0; /* not a peer of original */
+                return ERR_PTR(-ENOMEM);
-                else
-                        mnt->mnt_group_id = old->mnt_group_id;
-                if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
-                        int err = mnt_alloc_group_id(mnt);
-                        if (err)
-                                goto out_free;
-                }
-                mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+        if (flag & (CL_SLAVE | CL_PRIVATE))
-                atomic_inc(&sb->s_active);
+                mnt->mnt_group_id = 0; /* not a peer of original */
-                mnt->mnt.mnt_sb = sb;
+        else
-                mnt->mnt.mnt_root = dget(root);
+                mnt->mnt_group_id = old->mnt_group_id;
-                mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-                mnt->mnt_parent = mnt;
-                br_write_lock(&vfsmount_lock);
-                list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
-                br_write_unlock(&vfsmount_lock);
-                if (flag & CL_SLAVE) {
+        if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
-                        list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+                err = mnt_alloc_group_id(mnt);
-                        mnt->mnt_master = old;
+                if (err)
-                        CLEAR_MNT_SHARED(mnt);
+                        goto out_free;
-                } else if (!(flag & CL_PRIVATE)) {
-                        if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
-                                list_add(&mnt->mnt_share, &old->mnt_share);
-                        if (IS_MNT_SLAVE(old))
-                                list_add(&mnt->mnt_slave, &old->mnt_slave);
-                        mnt->mnt_master = old->mnt_master;
-                }
-                if (flag & CL_MAKE_SHARED)
-                        set_mnt_shared(mnt);
-                /* stick the duplicate mount on the same expiry list
-                 * as the original if that was on one */
-                if (flag & CL_EXPIRE) {
-                        if (!list_empty(&old->mnt_expire))
-                                list_add(&mnt->mnt_expire, &old->mnt_expire);
-                }
        }
+        mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+        atomic_inc(&sb->s_active);
+        mnt->mnt.mnt_sb = sb;
+        mnt->mnt.mnt_root = dget(root);
+        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
+        mnt->mnt_parent = mnt;
+        br_write_lock(&vfsmount_lock);
+        list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
+        br_write_unlock(&vfsmount_lock);
+        if (flag & CL_SLAVE) {
+                list_add(&mnt->mnt_slave, &old->mnt_slave_list);
+                mnt->mnt_master = old;
+                CLEAR_MNT_SHARED(mnt);
+        } else if (!(flag & CL_PRIVATE)) {
+                if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
+                        list_add(&mnt->mnt_share, &old->mnt_share);
+                if (IS_MNT_SLAVE(old))
+                        list_add(&mnt->mnt_slave, &old->mnt_slave);
+                mnt->mnt_master = old->mnt_master;
+        }
+        if (flag & CL_MAKE_SHARED)
+                set_mnt_shared(mnt);
+        /* stick the duplicate mount on the same expiry list
+         * as the original if that was on one */
+        if (flag & CL_EXPIRE) {
+                if (!list_empty(&old->mnt_expire))
+                        list_add(&mnt->mnt_expire, &old->mnt_expire);
+        }
        return mnt;
 out_free:
        free_vfsmnt(mnt);
-        return NULL;
+        return ERR_PTR(err);
 }
 static inline void mntfree(struct mount *mnt)
@@ -804,7 +803,8 @@ static void mntput_no_expire(struct mount *mnt)
 put_again:
 #ifdef CONFIG_SMP
        br_read_lock(&vfsmount_lock);
-        if (likely(atomic_read(&mnt->mnt_longterm))) {
+        if (likely(mnt->mnt_ns)) {
+                /* shouldn't be the last one */
                mnt_add_count(mnt, -1);
                br_read_unlock(&vfsmount_lock);
                return;
@@ -939,7 +939,7 @@ EXPORT_SYMBOL(replace_mount_options);
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-        struct proc_mounts *p = container_of(m, struct proc_mounts, m);
+        struct proc_mounts *p = proc_mounts(m);
        down_read(&namespace_sem);
        return seq_list_start(&p->ns->list, *pos);
@@ -947,7 +947,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-        struct proc_mounts *p = container_of(m, struct proc_mounts, m);
+        struct proc_mounts *p = proc_mounts(m);
        return seq_list_next(v, &p->ns->list, pos);
 }
@@ -959,7 +959,7 @@ static void m_stop(struct seq_file *m, void *v)
 static int m_show(struct seq_file *m, void *v)
 {
-        struct proc_mounts *p = container_of(m, struct proc_mounts, m);
+        struct proc_mounts *p = proc_mounts(m);
        struct mount *r = list_entry(v, struct mount, mnt_list);
        return p->show(m, &r->mnt);
 }
@@ -1074,8 +1074,6 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
                list_del_init(&p->mnt_expire);
                list_del_init(&p->mnt_list);
                __touch_mnt_namespace(p->mnt_ns);
-                if (p->mnt_ns)
-                        __mnt_make_shortterm(p);
                p->mnt_ns = NULL;
                list_del_init(&p->mnt_child);
                if (mnt_has_parent(p)) {
@@ -1260,11 +1258,12 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
        struct path path;
        if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
-                return NULL;
+                return ERR_PTR(-EINVAL);
        res = q = clone_mnt(mnt, dentry, flag);
-        if (!q)
+        if (IS_ERR(q))
-                goto Enomem;
+                return q;
        q->mnt_mountpoint = mnt->mnt_mountpoint;
        p = mnt;
@@ -1286,8 +1285,8 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                        path.mnt = &q->mnt;
                        path.dentry = p->mnt_mountpoint;
                        q = clone_mnt(p, p->mnt.mnt_root, flag);
-                        if (!q)
+                        if (IS_ERR(q))
-                                goto Enomem;
+                                goto out;
                        br_write_lock(&vfsmount_lock);
                        list_add_tail(&q->mnt_list, &res->mnt_list);
                        attach_mnt(q, &path);
@@ -1295,7 +1294,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                }
        }
        return res;
-Enomem:
+out:
        if (res) {
                LIST_HEAD(umount_list);
                br_write_lock(&vfsmount_lock);
@@ -1303,9 +1302,11 @@ Enomem:
                br_write_unlock(&vfsmount_lock);
                release_mounts(&umount_list);
        }
-        return NULL;
+        return q;
 }
+/* Caller should check returned pointer for errors */
 struct vfsmount *collect_mounts(struct path *path)
 {
        struct mount *tree;
@@ -1313,7 +1314,9 @@ struct vfsmount *collect_mounts(struct path *path)
        tree = copy_tree(real_mount(path->mnt), path->dentry,
                         CL_COPY_ALL | CL_PRIVATE);
        up_write(&namespace_sem);
-        return tree ? &tree->mnt : NULL;
+        if (IS_ERR(tree))
+                return NULL;
+        return &tree->mnt;
 }
 void drop_collected_mounts(struct vfsmount *mnt)
@@ -1608,14 +1611,15 @@ static int do_loopback(struct path *path, char *old_name,
        if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
                goto out2;
-        err = -ENOMEM;
        if (recurse)
                mnt = copy_tree(old, old_path.dentry, 0);
        else
                mnt = clone_mnt(old, old_path.dentry, 0);
-        if (!mnt)
+        if (IS_ERR(mnt)) {
-                goto out2;
+                err = PTR_ERR(mnt);
+                goto out;
+        }
        err = graft_tree(mnt, path);
        if (err) {
@@ -2209,23 +2213,6 @@ static struct mnt_namespace *alloc_mnt_ns(void)
        return new_ns;
 }
-void mnt_make_longterm(struct vfsmount *mnt)
-{
-        __mnt_make_longterm(real_mount(mnt));
-}
-void mnt_make_shortterm(struct vfsmount *m)
-{
-#ifdef CONFIG_SMP
-        struct mount *mnt = real_mount(m);
-        if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
-                return;
-        br_write_lock(&vfsmount_lock);
-        atomic_dec(&mnt->mnt_longterm);
-        br_write_unlock(&vfsmount_lock);
-#endif
-}
 /*
 * Allocate a new namespace structure and populate it with contents
 * copied from the namespace of the passed in task structure.
@@ -2246,10 +2233,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        down_write(&namespace_sem);
        /* First pass: copy the tree topology */
        new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
-        if (!new) {
+        if (IS_ERR(new)) {
                up_write(&namespace_sem);
                kfree(new_ns);
-                return ERR_PTR(-ENOMEM);
+                return ERR_CAST(new);
        }
        new_ns->root = new;
        br_write_lock(&vfsmount_lock);
@@ -2265,18 +2252,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        q = new;
        while (p) {
                q->mnt_ns = new_ns;
-                __mnt_make_longterm(q);
                if (fs) {
                        if (&p->mnt == fs->root.mnt) {
                                fs->root.mnt = mntget(&q->mnt);
-                                __mnt_make_longterm(q);
-                                mnt_make_shortterm(&p->mnt);
                                rootmnt = &p->mnt;
                        }
                        if (&p->mnt == fs->pwd.mnt) {
                                fs->pwd.mnt = mntget(&q->mnt);
-                                __mnt_make_longterm(q);
-                                mnt_make_shortterm(&p->mnt);
                                pwdmnt = &p->mnt;
                        }
                }
@@ -2320,7 +2302,6 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
        if (!IS_ERR(new_ns)) {
                struct mount *mnt = real_mount(m);
                mnt->mnt_ns = new_ns;
-                __mnt_make_longterm(mnt);
                new_ns->root = mnt;
                list_add(&new_ns->list, &mnt->mnt_list);
        } else {
@@ -2615,7 +2596,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
                 * it is a longterm mount, don't release mnt until
                 * we unmount before file sys is unregistered
                */
-                mnt_make_longterm(mnt);
+                real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
        }
        return mnt;
 }
@@ -2625,7 +2606,9 @@ void kern_unmount(struct vfsmount *mnt)
 {
        /* release long term mount so mount point can be released */
        if (!IS_ERR_OR_NULL(mnt)) {
-                mnt_make_shortterm(mnt);
+                br_write_lock(&vfsmount_lock);
+                real_mount(mnt)->mnt_ns = NULL;
+                br_write_unlock(&vfsmount_lock);
                mntput(mnt);
        }
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aeed93a6bde0..4117e7b377bb 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,8 +30,8 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
 static int ncp_readdir(struct file *, void *, filldir_t);
-static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+static int ncp_create(struct inode *, struct dentry *, umode_t, bool);
-static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int);
 static int ncp_unlink(struct inode *, struct dentry *);
 static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
 static int ncp_rmdir(struct inode *, struct dentry *);
@@ -72,7 +72,7 @@ const struct inode_operations ncp_dir_inode_operations =
 /*
 * Dentry operations routines
 */
-static int ncp_lookup_validate(struct dentry *, struct nameidata *);
+static int ncp_lookup_validate(struct dentry *, unsigned int);
 static int ncp_hash_dentry(const struct dentry *, const struct inode *,
                struct qstr *);
 static int ncp_compare_dentry(const struct dentry *, const struct inode *,
@@ -290,7 +290,7 @@ leave_me:;
 static int
-ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
+ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
 {
        struct ncp_server *server;
        struct dentry *parent;
@@ -302,7 +302,7 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
        if (dentry == dentry->d_sb->s_root)
                return 1;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        parent = dget_parent(dentry);
@@ -836,7 +836,7 @@ out:
        return result;
 }
-static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct ncp_server *server = NCP_SERVER(dir);
        struct inode *inode = NULL;
@@ -980,7 +980,7 @@ out:
 }
 static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        return ncp_create_new(dir, dentry, mode, 0, 0);
 }
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f430057ff3b3..a6b1c7fb8232 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -46,8 +46,8 @@
 static int nfs_opendir(struct inode *, struct file *);
 static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
-static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
-static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
+static int nfs_create(struct inode *, struct dentry *, umode_t, bool);
 static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
 static int nfs_rmdir(struct inode *, struct dentry *);
 static int nfs_unlink(struct inode *, struct dentry *);
@@ -111,11 +111,13 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
-static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int nfs_atomic_open(struct inode *, struct dentry *,
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
+                           struct file *, unsigned, umode_t,
+                           int *);
 const struct inode_operations nfs4_dir_inode_operations = {
-        .create         = nfs_open_create,
+        .create         = nfs_create,
-        .lookup         = nfs_atomic_lookup,
+        .lookup         = nfs_lookup,
+        .atomic_open    = nfs_atomic_open,
        .link           = nfs_link,
        .unlink         = nfs_unlink,
        .symlink        = nfs_symlink,
@@ -1029,27 +1031,14 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
 }
 /*
- * Return the intent data that applies to this particular path component
- *
- * Note that the current set of intents only apply to the very last
- * component of the path and none of them is set before that last
- * component.
- */
-static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
-                                                unsigned int mask)
-{
-        return nd->flags & mask;
-}
-/*
 * Use intent information to check whether or not we're going to do
 * an O_EXCL create using this path component.
 */
-static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
+static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
 {
        if (NFS_PROTO(dir)->version == 2)
                return 0;
-        return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL);
+        return flags & LOOKUP_EXCL;
 }
 /*
@@ -1061,25 +1050,20 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 *
 */
 static inline
-int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
+int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
 {
        struct nfs_server *server = NFS_SERVER(inode);
        if (IS_AUTOMOUNT(inode))
                return 0;
-        if (nd != NULL) {
+        /* VFS wants an on-the-wire revalidation */
-                /* VFS wants an on-the-wire revalidation */
+        if (flags & LOOKUP_REVAL)
-                if (nd->flags & LOOKUP_REVAL)
+                goto out_force;
-                        goto out_force;
+        /* This is an open(2) */
-                /* This is an open(2) */
+        if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) &&
-                if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 &&
+            (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
-                                !(server->flags & NFS_MOUNT_NOCTO) &&
+                goto out_force;
-                                (S_ISREG(inode->i_mode) ||
+        return 0;
-                                 S_ISDIR(inode->i_mode)))
-                        goto out_force;
-                return 0;
-        }
-        return nfs_revalidate_inode(server, inode);
 out_force:
        return __nfs_revalidate_inode(server, inode);
 }
@@ -1093,10 +1077,10 @@ out_force:
 */
 static inline
 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
-                       struct nameidata *nd)
+                       unsigned int flags)
 {
        /* Don't revalidate a negative dentry if we're creating a new file */
-        if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
+        if (flags & LOOKUP_CREATE)
                return 0;
        if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
                return 1;
@@ -1114,7 +1098,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
 * If the parent directory is seen to have changed, we throw out the
 * cached dentry and do a new lookup.
 */
-static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct inode *dir;
        struct inode *inode;
@@ -1123,7 +1107,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
        struct nfs_fattr *fattr = NULL;
        int error;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        parent = dget_parent(dentry);
@@ -1132,7 +1116,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
        inode = dentry->d_inode;
        if (!inode) {
-                if (nfs_neg_need_reval(dir, dentry, nd))
+                if (nfs_neg_need_reval(dir, dentry, flags))
                        goto out_bad;
                goto out_valid_noent;
        }
@@ -1148,8 +1132,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
                goto out_set_verifier;
        /* Force a full look up iff the parent directory has changed */
-        if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
+        if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
-                if (nfs_lookup_verify_inode(inode, nd))
+                if (nfs_lookup_verify_inode(inode, flags))
                        goto out_zap_parent;
                goto out_valid;
        }
@@ -1286,7 +1270,7 @@ const struct dentry_operations nfs_dentry_operations = {
        .d_release      = nfs_d_release,
 };
-static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
        struct dentry *res;
        struct dentry *parent;
@@ -1307,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
         * If we're doing an exclusive create, optimize away the lookup
         * but don't hash the dentry.
         */
-        if (nfs_is_exclusive_create(dir, nd)) {
+        if (nfs_is_exclusive_create(dir, flags)) {
                d_instantiate(dentry, NULL);
                res = NULL;
                goto out;
@@ -1354,7 +1338,7 @@ out:
 }
 #ifdef CONFIG_NFS_V4
-static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *);
+static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
 const struct dentry_operations nfs4_dentry_operations = {
        .d_revalidate   = nfs4_lookup_revalidate,
@@ -1364,24 +1348,6 @@ const struct dentry_operations nfs4_dentry_operations = {
        .d_release      = nfs_d_release,
 };
-/*
- * Use intent information to determine whether we need to substitute
- * the NFSv4-style stateful OPEN for the LOOKUP call
- */
-static int is_atomic_open(struct nameidata *nd)
-{
-        if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
-                return 0;
-        /* NFS does not (yet) have a stateful open for directories */
-        if (nd->flags & LOOKUP_DIRECTORY)
-                return 0;
-        /* Are we trying to write to a read only partition? */
-        if (__mnt_is_readonly(nd->path.mnt) &&
-            (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
-                return 0;
-        return 1;
-}
 static fmode_t flags_to_mode(int flags)
 {
        fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
@@ -1403,136 +1369,143 @@ static int do_open(struct inode *inode, struct file *filp)
        return 0;
 }
-static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
+static int nfs_finish_open(struct nfs_open_context *ctx,
+                           struct dentry *dentry,
+                           struct file *file, unsigned open_flags,
+                           int *opened)
 {
-        struct file *filp;
+        int err;
-        int ret = 0;
+        if (ctx->dentry != dentry) {
+                dput(ctx->dentry);
+                ctx->dentry = dget(dentry);
+        }
        /* If the open_intent is for execute, we have an extra check to make */
        if (ctx->mode & FMODE_EXEC) {
-                ret = nfs_may_open(ctx->dentry->d_inode,
+                err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags);
-                                ctx->cred,
+                if (err < 0)
-                                nd->intent.open.flags);
-                if (ret < 0)
                        goto out;
        }
-        filp = lookup_instantiate_filp(nd, ctx->dentry, do_open);
-        if (IS_ERR(filp))
+        err = finish_open(file, dentry, do_open, opened);
-                ret = PTR_ERR(filp);
+        if (err)
-        else
+                goto out;
-                nfs_file_set_open_context(filp, ctx);
+        nfs_file_set_open_context(file, ctx);
 out:
        put_nfs_open_context(ctx);
-        return ret;
+        return err;
 }
-static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+                            struct file *file, unsigned open_flags,
+                            umode_t mode, int *opened)
 {
        struct nfs_open_context *ctx;
-        struct iattr attr;
+        struct dentry *res;
-        struct dentry *res = NULL;
+        struct iattr attr = { .ia_valid = ATTR_OPEN };
        struct inode *inode;
-        int open_flags;
        int err;
-        dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+        /* Expect a negative dentry */
+        BUG_ON(dentry->d_inode);
+        dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
-        /* Check that we are indeed trying to open this file */
+        /* NFS only supports OPEN on regular files */
-        if (!is_atomic_open(nd))
+        if ((open_flags & O_DIRECTORY)) {
+                if (!d_unhashed(dentry)) {
+                        /*
+                         * Hashed negative dentry with O_DIRECTORY: dentry was
+                         * revalidated and is fine, no need to perform lookup
+                         * again
+                         */
+                        return -ENOENT;
+                }
                goto no_open;
-        if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
-                res = ERR_PTR(-ENAMETOOLONG);
-                goto out;
-        }
-        /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
-         * the dentry. */
-        if (nd->flags & LOOKUP_EXCL) {
-                d_instantiate(dentry, NULL);
-                goto out;
        }
-        open_flags = nd->intent.open.flags;
+        if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
-        attr.ia_valid = ATTR_OPEN;
+                return -ENAMETOOLONG;
-        ctx = create_nfs_open_context(dentry, open_flags);
-        res = ERR_CAST(ctx);
-        if (IS_ERR(ctx))
-                goto out;
-        if (nd->flags & LOOKUP_CREATE) {
+        if (open_flags & O_CREAT) {
-                attr.ia_mode = nd->intent.open.create_mode;
                attr.ia_valid |= ATTR_MODE;
-                attr.ia_mode &= ~current_umask();
+                attr.ia_mode = mode & ~current_umask();
-        } else
+        }
-                open_flags &= ~(O_EXCL | O_CREAT);
        if (open_flags & O_TRUNC) {
                attr.ia_valid |= ATTR_SIZE;
                attr.ia_size = 0;
        }
-        /* Open the file on the server */
+        ctx = create_nfs_open_context(dentry, open_flags);
+        err = PTR_ERR(ctx);
+        if (IS_ERR(ctx))
+                goto out;
        nfs_block_sillyrename(dentry->d_parent);
        inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+        d_drop(dentry);
        if (IS_ERR(inode)) {
                nfs_unblock_sillyrename(dentry->d_parent);
                put_nfs_open_context(ctx);
-                switch (PTR_ERR(inode)) {
+                err = PTR_ERR(inode);
-                        /* Make a negative dentry */
+                switch (err) {
-                        case -ENOENT:
+                case -ENOENT:
-                                d_add(dentry, NULL);
+                        d_add(dentry, NULL);
-                                res = NULL;
+                        break;
-                                goto out;
+                case -EISDIR:
-                        /* This turned out not to be a regular file */
+                case -ENOTDIR:
-                        case -EISDIR:
+                        goto no_open;
-                        case -ENOTDIR:
+                case -ELOOP:
+                        if (!(open_flags & O_NOFOLLOW))
                                goto no_open;
-                        case -ELOOP:
+                        break;
-                                if (!(nd->intent.open.flags & O_NOFOLLOW))
-                                        goto no_open;
                        /* case -EINVAL: */
-                        default:
+                default:
-                                res = ERR_CAST(inode);
+                        break;
-                                goto out;
                }
+                goto out;
        }
        res = d_add_unique(dentry, inode);
-        nfs_unblock_sillyrename(dentry->d_parent);
+        if (res != NULL)
-        if (res != NULL) {
-                dput(ctx->dentry);
-                ctx->dentry = dget(res);
                dentry = res;
-        }
-        err = nfs_intent_set_file(nd, ctx);
+        nfs_unblock_sillyrename(dentry->d_parent);
-        if (err < 0) {
-                if (res != NULL)
-                        dput(res);
-                return ERR_PTR(err);
-        }
-out:
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-        return res;
+        err = nfs_finish_open(ctx, dentry, file, open_flags, opened);
+        dput(res);
+out:
+        return err;
 no_open:
-        return nfs_lookup(dir, dentry, nd);
+        res = nfs_lookup(dir, dentry, 0);
+        err = PTR_ERR(res);
+        if (IS_ERR(res))
+                goto out;
+        return finish_no_open(file, res);
 }
-static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct dentry *parent = NULL;
        struct inode *inode;
        struct inode *dir;
-        int openflags, ret = 0;
+        int ret = 0;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
-        inode = dentry->d_inode;
+        if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
-        if (!is_atomic_open(nd) || d_mountpoint(dentry))
+                goto no_open;
+        if (d_mountpoint(dentry))
                goto no_open;
+        inode = dentry->d_inode;
        parent = dget_parent(dentry);
        dir = parent->d_inode;
@@ -1540,7 +1513,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
         * optimize away revalidation of negative dentries.
         */
        if (inode == NULL) {
-                if (!nfs_neg_need_reval(dir, dentry, nd))
+                if (!nfs_neg_need_reval(dir, dentry, flags))
                        ret = 1;
                goto out;
        }
@@ -1548,9 +1521,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
        /* NFS only supports OPEN on regular files */
        if (!S_ISREG(inode->i_mode))
                goto no_open_dput;
-        openflags = nd->intent.open.flags;
        /* We cannot do exclusive creation on a positive dentry */
-        if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
+        if (flags & LOOKUP_EXCL)
                goto no_open_dput;
        /* Let f_op->open() actually open (and revalidate) the file */
@@ -1563,48 +1535,7 @@ out:
 no_open_dput:
        dput(parent);
 no_open:
-        return nfs_lookup_revalidate(dentry, nd);
+        return nfs_lookup_revalidate(dentry, flags);
-}
-static int nfs_open_create(struct inode *dir, struct dentry *dentry,
-                umode_t mode, struct nameidata *nd)
-{
-        struct nfs_open_context *ctx = NULL;
-        struct iattr attr;
-        int error;
-        int open_flags = O_CREAT|O_EXCL;
-        dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
-                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
-        attr.ia_mode = mode;
-        attr.ia_valid = ATTR_MODE;
-        if (nd)
-                open_flags = nd->intent.open.flags;
-        ctx = create_nfs_open_context(dentry, open_flags);
-        error = PTR_ERR(ctx);
-        if (IS_ERR(ctx))
-                goto out_err_drop;
-        error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
-        if (error != 0)
-                goto out_put_ctx;
-        if (nd) {
-                error = nfs_intent_set_file(nd, ctx);
-                if (error < 0)
-                        goto out_err;
-        } else {
-                put_nfs_open_context(ctx);
-        }
-        return 0;
-out_put_ctx:
-        put_nfs_open_context(ctx);
-out_err_drop:
-        d_drop(dentry);
-out_err:
-        return error;
 }
 #endif /* CONFIG_NFSV4 */
@@ -1658,11 +1589,11 @@ out_error:
 * reply path made it appear to have failed.
 */
 static int nfs_create(struct inode *dir, struct dentry *dentry,
-                umode_t mode, struct nameidata *nd)
+                umode_t mode, bool excl)
 {
        struct iattr attr;
+        int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
        int error;
-        int open_flags = O_CREAT|O_EXCL;
        dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1670,10 +1601,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
        attr.ia_mode = mode;
        attr.ia_valid = ATTR_MODE;
-        if (nd)
+        error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
-                open_flags = nd->intent.open.flags;
-        error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
        if (error != 0)
                goto out_err;
        return 0;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 8abfb19bd3aa..a67990f90bd7 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
                 */
                spin_lock(&sb->s_root->d_inode->i_lock);
                spin_lock(&sb->s_root->d_lock);
-                list_del_init(&sb->s_root->d_alias);
+                hlist_del_init(&sb->s_root->d_alias);
                spin_unlock(&sb->s_root->d_lock);
                spin_unlock(&sb->s_root->d_inode->i_lock);
        }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2292a0fd2bff..3187e24e8f78 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
 */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags, struct nfs_open_context *ctx)
+                 int flags)
 {
        struct nfs3_createdata *data;
        umode_t mode = sattr->ia_mode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 15fc7e4664ed..c157b2089b47 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2806,37 +2806,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
 }
 /*
- * Got race?
+ * This is just for mknod.  open(O_CREAT) will always do ->open_context().
- * We will need to arrange for the VFS layer to provide an atomic open.
- * Until then, this create/open method is prone to inefficiency and race
- * conditions due to the lookup, create, and open VFS calls from sys_open()
- * placed on the wire.
- *
- * Given the above sorry state of affairs, I'm simply sending an OPEN.
- * The file will be opened again in the subsequent VFS open call
- * (nfs4_proc_file_open).
- *
- * The open for read will just hang around to be used by any process that
- * opens the file O_RDONLY. This will all be resolved with the VFS changes.
 */
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags, struct nfs_open_context *ctx)
+                 int flags)
 {
-        struct dentry *de = dentry;
+        struct nfs_open_context *ctx;
        struct nfs4_state *state;
-        struct rpc_cred *cred = NULL;
-        fmode_t fmode = 0;
        int status = 0;
-        if (ctx != NULL) {
+        ctx = alloc_nfs_open_context(dentry, FMODE_READ);
-                cred = ctx->cred;
+        if (IS_ERR(ctx))
-                de = ctx->dentry;
+                return PTR_ERR(ctx);
-                fmode = ctx->mode;
-        }
        sattr->ia_mode &= ~current_umask();
-        state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL);
+        state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL);
        d_drop(dentry);
        if (IS_ERR(state)) {
                status = PTR_ERR(state);
@@ -2844,11 +2829,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        }
        d_add(dentry, igrab(state->inode));
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-        if (ctx != NULL)
+        ctx->state = state;
-                ctx->state = state;
-        else
-                nfs4_close_sync(state, fmode);
 out:
+        put_nfs_open_context(ctx);
        return status;
 }
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baebab..f50d3e8d6f22 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
        objios->ios->done = _read_done;
        dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
                rdata->args.offset, rdata->args.count);
-        return ore_read(objios->ios);
+        ret = ore_read(objios->ios);
+        if (unlikely(ret))
+                objio_free_result(&objios->oir);
+        return ret;
 }
 /*
@@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
        struct nfs_write_data *wdata = objios->oir.rpcdata;
        struct address_space *mapping = wdata->header->inode->i_mapping;
        pgoff_t index = offset / PAGE_SIZE;
-        struct page *page = find_get_page(mapping, index);
+        struct page *page;
+        loff_t i_size = i_size_read(wdata->header->inode);
+        if (offset >= i_size) {
+                *uptodate = true;
+                dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
+                return ZERO_PAGE(0);
+        }
+        page = find_get_page(mapping, index);
        if (!page) {
                page = find_or_create_page(mapping, index, GFP_NOFS);
                if (unlikely(!page)) {
@@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 static void __r4w_put_page(void *priv, struct page *page)
 {
-        dprintk("%s: index=0x%lx\n", __func__, page->index);
+        dprintk("%s: index=0x%lx\n", __func__,
-        page_cache_release(page);
+                (page == ZERO_PAGE(0)) ? -1UL : page->index);
+        if (ZERO_PAGE(0) != page)
+                page_cache_release(page);
        return;
 }
@@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
        dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
                wdata->args.offset, wdata->args.count);
        ret = ore_write(objios->ios);
-        if (unlikely(ret))
+        if (unlikely(ret)) {
+                objio_free_result(&objios->oir);
                return ret;
+        }
        if (objios->sync)
                _write_done(objios->ios, objios);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 617c7419a08e..4433806e116f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                int flags, struct nfs_open_context *ctx)
+                int flags)
 {
        struct nfs_createdata *data;
        struct rpc_message msg = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 06228192f64e..8b2a2977b720 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
                sb_mntdata.mntflags |= MS_SYNCHRONOUS;
        /* Get a superblock - note that we may end up sharing one that already exists */
-        s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
+        s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata);
        if (IS_ERR(s)) {
                mntroot = ERR_CAST(s);
                goto out_err_nosb;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c8bd9c3be7f7..4700a0a929d7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -745,7 +745,7 @@ __be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        int may_flags, struct file **filp)
 {
-        struct dentry   *dentry;
+        struct path     path;
        struct inode    *inode;
        int             flags = O_RDONLY|O_LARGEFILE;
        __be32          err;
@@ -762,8 +762,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
        if (err)
                goto out;
-        dentry = fhp->fh_dentry;
+        path.mnt = fhp->fh_export->ex_path.mnt;
-        inode = dentry->d_inode;
+        path.dentry = fhp->fh_dentry;
+        inode = path.dentry->d_inode;
        /* Disallow write access to files with the append-only bit set
         * or any access when mandatory locking enabled
@@ -792,8 +793,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                else
                        flags = O_WRONLY|O_LARGEFILE;
        }
-        *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
+        *filp = dentry_open(&path, flags, current_cred());
-                            flags, current_cred());
        if (IS_ERR(*filp))
                host_err = PTR_ERR(*filp);
        else {
@@ -1329,7 +1329,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = 0;
        switch (type) {
        case S_IFREG:
-                host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+                host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
                if (!host_err)
                        nfsd_check_ignore_resizing(iap);
                break;
@@ -1492,7 +1492,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
        }
-        host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+        host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
        if (host_err < 0) {
                fh_drop_write(fhp);
                goto out_nfserr;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index b72847988b78..1d0c0b84c5a3 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -63,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
 */
 static struct dentry *
-nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode;
        ino_t ino;
@@ -85,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 * with d_instantiate().
 */
 static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                        struct nameidata *nd)
+                        bool excl)
 {
        struct inode *inode;
        struct nilfs_transaction_info ti;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1099a76cee59..d57c42f974ea 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1288,7 +1288,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
                err = -EBUSY;
                goto failed;
        }
-        s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev);
+        s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
+                 sd.bdev);
        mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
        if (IS_ERR(s)) {
                err = PTR_ERR(s);
@@ -1301,7 +1302,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
                s_new = true;
                /* New superblock instance created */
-                s->s_flags = flags;
                s->s_mode = mode;
                strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
                sb_set_blocksize(s, block_size(sd.bdev));
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3568c8a8b138..d43803669739 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -61,8 +61,6 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 {
        int client_fd;
-        struct dentry *dentry;
-        struct vfsmount *mnt;
        struct file *new_file;
        pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -81,12 +79,10 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
         * we need a new file handle for the userspace program so it can read even if it was
         * originally opened O_WRONLY.
         */
-        dentry = dget(event->path.dentry);
-        mnt = mntget(event->path.mnt);
        /* it's possible this event was an overflow event.  in that case dentry and mnt
         * are NULL;  That's fine, just don't call dentry open */
-        if (dentry && mnt)
+        if (event->path.dentry && event->path.mnt)
-                new_file = dentry_open(dentry, mnt,
+                new_file = dentry_open(&event->path,
                                       group->fanotify_data.f_flags | FMODE_NONOTIFY,
                                       current_cred());
        else
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index b39c5c161adb..6baadb5a8430 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,6 +52,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 void __fsnotify_update_child_dentry_flags(struct inode *inode)
 {
        struct dentry *alias;
+        struct hlist_node *p;
        int watched;
        if (!S_ISDIR(inode->i_mode))
@@ -63,7 +64,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
        spin_lock(&inode->i_lock);
        /* run all of the dentries associated with this inode.  Since this is a
         * directory, there damn well better only be one item on this list */
-        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+        hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
                struct dentry *child;
                /* run all of the children of the original inode and fix their
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 358273e59ade..436f36037e09 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -101,7 +101,7 @@
 * Locking: Caller must hold i_mutex on the directory.
 */
 static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
-                struct nameidata *nd)
+                unsigned int flags)
 {
        ntfs_volume *vol = NTFS_SB(dir_ino->i_sb);
        struct inode *dent_inode;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index e5ba34818332..8db4b58b2e4b 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -49,14 +49,13 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
 }
-static int ocfs2_dentry_revalidate(struct dentry *dentry,
+static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
-                                   struct nameidata *nd)
 {
        struct inode *inode;
        int ret = 0;    /* if all else fails, just return false */
        struct ocfs2_super *osb;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        inode = dentry->d_inode;
@@ -170,13 +169,11 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
                                      u64 parent_blkno,
                                      int skip_unhashed)
 {
-        struct list_head *p;
+        struct hlist_node *p;
-        struct dentry *dentry = NULL;
+        struct dentry *dentry;
        spin_lock(&inode->i_lock);
-        list_for_each(p, &inode->i_dentry) {
+        hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
-                dentry = list_entry(p, struct dentry, d_alias);
                spin_lock(&dentry->d_lock);
                if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
                        trace_ocfs2_find_local_alias(dentry->d_name.len,
@@ -184,16 +181,13 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
                        dget_dlock(dentry);
                        spin_unlock(&dentry->d_lock);
-                        break;
+                        spin_unlock(&inode->i_lock);
+                        return dentry;
                }
                spin_unlock(&dentry->d_lock);
-                dentry = NULL;
        }
        spin_unlock(&inode->i_lock);
+        return NULL;
-        return dentry;
 }
 DEFINE_SPINLOCK(dentry_attach_lock);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index e31d6ae013ab..83b6f98e0665 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -526,7 +526,7 @@ bail:
 static int dlmfs_create(struct inode *dir,
                        struct dentry *dentry,
                        umode_t mode,
-                        struct nameidata *nd)
+                        bool excl)
 {
        int status = 0;
        struct inode *inode;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9f39c640cddf..f1fd0741162b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -98,7 +98,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        int status;
        u64 blkno;
@@ -618,7 +618,7 @@ static int ocfs2_mkdir(struct inode *dir,
 static int ocfs2_create(struct inode *dir,
                        struct dentry *dentry,
                        umode_t mode,
-                        struct nameidata *nd)
+                        bool excl)
 {
        int ret;
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index f00576ec320f..fb5b3ff79dc6 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -285,13 +285,13 @@ static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 }
 static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        return omfs_add_node(dir, dentry, mode | S_IFREG);
 }
 static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
-                                  struct nameidata *nd)
+                                  unsigned int flags)
 {
        struct buffer_head *bh;
        struct inode *inode = NULL;
diff --git a/fs/open.c b/fs/open.c
index 1540632d8387..1e914b397e12 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -537,25 +537,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
        return error;
 }
-SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
-{
-        struct path path;
-        int error;
-        error = user_path(filename, &path);
-        if (error)
-                goto out;
-        error = mnt_want_write(path.mnt);
-        if (error)
-                goto out_release;
-        error = chown_common(&path, user, group);
-        mnt_drop_write(path.mnt);
-out_release:
-        path_put(&path);
-out:
-        return error;
-}
 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
                gid_t, group, int, flag)
 {
@@ -583,23 +564,15 @@ out:
        return error;
 }
-SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
+SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
 {
-        struct path path;
+        return sys_fchownat(AT_FDCWD, filename, user, group, 0);
-        int error;
+}
-        error = user_lpath(filename, &path);
+SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
-        if (error)
+{
-                goto out;
+        return sys_fchownat(AT_FDCWD, filename, user, group,
-        error = mnt_want_write(path.mnt);
+                            AT_SYMLINK_NOFOLLOW);
-        if (error)
-                goto out_release;
-        error = chown_common(&path, user, group);
-        mnt_drop_write(path.mnt);
-out_release:
-        path_put(&path);
-out:
-        return error;
 }
 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
@@ -667,10 +640,9 @@ int open_check_o_direct(struct file *f)
        return 0;
 }
-static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
+static int do_dentry_open(struct file *f,
-                                   struct file *f,
+                          int (*open)(struct inode *, struct file *),
-                                   int (*open)(struct inode *, struct file *),
+                          const struct cred *cred)
-                                   const struct cred *cred)
 {
        static const struct file_operations empty_fops = {};
        struct inode *inode;
@@ -682,9 +654,9 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
        if (unlikely(f->f_flags & O_PATH))
                f->f_mode = FMODE_PATH;
-        inode = dentry->d_inode;
+        inode = f->f_path.dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
-                error = __get_file_write_access(inode, mnt);
+                error = __get_file_write_access(inode, f->f_path.mnt);
                if (error)
                        goto cleanup_file;
                if (!special_file(inode->i_mode))
@@ -692,14 +664,12 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
        }
        f->f_mapping = inode->i_mapping;
-        f->f_path.dentry = dentry;
-        f->f_path.mnt = mnt;
        f->f_pos = 0;
        file_sb_list_add(f, inode->i_sb);
        if (unlikely(f->f_mode & FMODE_PATH)) {
                f->f_op = &empty_fops;
-                return f;
+                return 0;
        }
        f->f_op = fops_get(inode->i_fop);
@@ -726,10 +696,11 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
        file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
-        return f;
+        return 0;
 cleanup_all:
        fops_put(f->f_op);
+        file_sb_list_del(f);
        if (f->f_mode & FMODE_WRITE) {
                put_write_access(inode);
                if (!special_file(inode->i_mode)) {
@@ -740,124 +711,62 @@ cleanup_all:
                         * here, so just reset the state.
                         */
                        file_reset_write(f);
-                        mnt_drop_write(mnt);
+                        mnt_drop_write(f->f_path.mnt);
                }
        }
-        file_sb_list_del(f);
-        f->f_path.dentry = NULL;
-        f->f_path.mnt = NULL;
 cleanup_file:
-        dput(dentry);
+        path_put(&f->f_path);
-        mntput(mnt);
+        f->f_path.mnt = NULL;
-        return ERR_PTR(error);
+        f->f_path.dentry = NULL;
-}
+        return error;
-static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-                                struct file *f,
-                                int (*open)(struct inode *, struct file *),
-                                const struct cred *cred)
-{
-        struct file *res = do_dentry_open(dentry, mnt, f, open, cred);
-        if (!IS_ERR(res)) {
-                int error = open_check_o_direct(f);
-                if (error) {
-                        fput(res);
-                        res = ERR_PTR(error);
-                }
-        } else {
-                put_filp(f);
-        }
-        return res;
 }
 /**
- * lookup_instantiate_filp - instantiates the open intent filp
+ * finish_open - finish opening a file
- * @nd: pointer to nameidata
+ * @od: opaque open data
 * @dentry: pointer to dentry
 * @open: open callback
 *
- * Helper for filesystems that want to use lookup open intents and pass back
+ * This can be used to finish opening a file passed to i_op->atomic_open().
- * a fully instantiated struct file to the caller.
+ *
- * This function is meant to be called from within a filesystem's
- * lookup method.
- * Beware of calling it for non-regular files! Those ->open methods might block
- * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
- * leading to a deadlock, as nobody can open that fifo anymore, because
- * another process to open fifo will block on locked parent when doing lookup).
- * Note that in case of error, nd->intent.open.file is destroyed, but the
- * path information remains valid.
 * If the open callback is set to NULL, then the standard f_op->open()
 * filesystem callback is substituted.
 */
-struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+int finish_open(struct file *file, struct dentry *dentry,
-                int (*open)(struct inode *, struct file *))
+                int (*open)(struct inode *, struct file *),
+                int *opened)
 {
-        const struct cred *cred = current_cred();
+        int error;
+        BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
-        if (IS_ERR(nd->intent.open.file))
+        mntget(file->f_path.mnt);
-                goto out;
+        file->f_path.dentry = dget(dentry);
-        if (IS_ERR(dentry))
-                goto out_err;
+        error = do_dentry_open(file, open, current_cred());
-        nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
+        if (!error)
-                                             nd->intent.open.file,
+                *opened |= FILE_OPENED;
-                                             open, cred);
-out:
+        return error;
-        return nd->intent.open.file;
-out_err:
-        release_open_intent(nd);
-        nd->intent.open.file = ERR_CAST(dentry);
-        goto out;
 }
-EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
+EXPORT_SYMBOL(finish_open);
 /**
- * nameidata_to_filp - convert a nameidata to an open filp.
+ * finish_no_open - finish ->atomic_open() without opening the file
- * @nd: pointer to nameidata
+ *
- * @flags: open flags
+ * @od: opaque open data
+ * @dentry: dentry or NULL (as returned from ->lookup())
 *
- * Note that this function destroys the original nameidata
+ * This can be used to set the result of a successful lookup in ->atomic_open().
+ * The filesystem's atomic_open() method shall return NULL after calling this.
 */
-struct file *nameidata_to_filp(struct nameidata *nd)
+int finish_no_open(struct file *file, struct dentry *dentry)
 {
-        const struct cred *cred = current_cred();
+        file->f_path.dentry = dentry;
-        struct file *filp;
+        return 1;
-        /* Pick up the filp from the open intent */
-        filp = nd->intent.open.file;
-        /* Has the filesystem initialised the file for us? */
-        if (filp->f_path.dentry != NULL) {
-                nd->intent.open.file = NULL;
-        } else {
-                struct file *res;
-                path_get(&nd->path);
-                res = do_dentry_open(nd->path.dentry, nd->path.mnt,
-                                     filp, NULL, cred);
-                if (!IS_ERR(res)) {
-                        int error;
-                        nd->intent.open.file = NULL;
-                        BUG_ON(res != filp);
-                        error = open_check_o_direct(filp);
-                        if (error) {
-                                fput(filp);
-                                filp = ERR_PTR(error);
-                        }
-                } else {
-                        /* Allow nd->intent.open.file to be recycled */
-                        filp = res;
-                }
-        }
-        return filp;
 }
+EXPORT_SYMBOL(finish_no_open);
-/*
+struct file *dentry_open(const struct path *path, int flags,
- * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
- * error.
- */
-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
                         const struct cred *cred)
 {
        int error;
@@ -866,18 +775,28 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
        validate_creds(cred);
        /* We must always pass in a valid mount pointer. */
-        BUG_ON(!mnt);
+        BUG_ON(!path->mnt);
        error = -ENFILE;
        f = get_empty_filp();
-        if (f == NULL) {
+        if (f == NULL)
-                dput(dentry);
-                mntput(mnt);
                return ERR_PTR(error);
-        }
        f->f_flags = flags;
-        return __dentry_open(dentry, mnt, f, NULL, cred);
+        f->f_path = *path;
+        path_get(&f->f_path);
+        error = do_dentry_open(f, NULL, cred);
+        if (!error) {
+                error = open_check_o_direct(f);
+                if (error) {
+                        fput(f);
+                        f = ERR_PTR(error);
+                }
+        } else { 
+                put_filp(f);
+                f = ERR_PTR(error);
+        }
+        return f;
 }
 EXPORT_SYMBOL(dentry_open);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index bc49c975d501..4a3477949bca 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -170,13 +170,13 @@ static const struct file_operations openprom_operations = {
        .llseek         = generic_file_llseek,
 };
-static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
+static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, unsigned int);
 static const struct inode_operations openprom_inode_operations = {
        .lookup         = openpromfs_lookup,
 };
-static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        struct op_inode_info *ent_oi, *oi = OP_I(dir);
        struct device_node *dp, *child;
diff --git a/fs/pnode.c b/fs/pnode.c
index bed378db0758..3e000a51ac0d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -237,8 +237,9 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
-                if (!(child = copy_tree(source, source->mnt.mnt_root, type))) {
+                child = copy_tree(source, source->mnt.mnt_root, type);
-                        ret = -ENOMEM;
+                if (IS_ERR(child)) {
+                        ret = PTR_ERR(child);
                        list_splice(tree_list, tmp_list.prev);
                        goto out;
                }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 437195f204e1..2772208338f8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1427,16 +1427,19 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
+        struct path path;
        int error = -EACCES;
-        /* We don't need a base pointer in the /proc filesystem */
-        path_put(&nd->path);
        /* Are we allowed to snoop on the tasks file descriptors? */
        if (!proc_fd_access_allowed(inode))
                goto out;
-        error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path);
+        error = PROC_I(inode)->op.proc_get_link(dentry, &path);
+        if (error)
+                goto out;
+        nd_jump_link(nd, &path);
+        return NULL;
 out:
        return ERR_PTR(error);
 }
@@ -1601,13 +1604,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 * made this apply to all per process world readable and executable
 * directories.
 */
-int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
+int pid_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode;
        struct task_struct *task;
        const struct cred *cred;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        inode = dentry->d_inode;
@@ -1781,7 +1784,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
        return proc_fd_info(dentry->d_inode, path, NULL);
 }
-static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode;
        struct task_struct *task;
@@ -1789,7 +1792,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
        struct files_struct *files;
        const struct cred *cred;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        inode = dentry->d_inode;
@@ -1868,7 +1871,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
        d_set_d_op(dentry, &tid_fd_dentry_operations);
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
-        if (tid_fd_revalidate(dentry, NULL))
+        if (tid_fd_revalidate(dentry, 0))
                error = NULL;
 out:
@@ -1956,7 +1959,7 @@ out_no_task:
 }
 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
-                                    struct nameidata *nd)
+                                    unsigned int flags)
 {
        return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
 }
@@ -2003,7 +2006,7 @@ static int dname_to_vma_addr(struct dentry *dentry,
        return 0;
 }
-static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        unsigned long vm_start, vm_end;
        bool exact_vma_exists = false;
@@ -2013,7 +2016,7 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
        struct inode *inode;
        int status = 0;
-        if (nd && nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        if (!capable(CAP_SYS_ADMIN)) {
@@ -2145,7 +2148,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
 }
 static struct dentry *proc_map_files_lookup(struct inode *dir,
-                struct dentry *dentry, struct nameidata *nd)
+                struct dentry *dentry, unsigned int flags)
 {
        unsigned long vm_start, vm_end;
        struct vm_area_struct *vma;
@@ -2371,7 +2374,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
        d_set_d_op(dentry, &tid_fd_dentry_operations);
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
-        if (tid_fd_revalidate(dentry, NULL))
+        if (tid_fd_revalidate(dentry, 0))
                error = NULL;
 out:
@@ -2380,7 +2383,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
 static struct dentry *proc_lookupfdinfo(struct inode *dir,
                                        struct dentry *dentry,
-                                        struct nameidata *nd)
+                                        unsigned int flags)
 {
        return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
 }
@@ -2430,7 +2433,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
        d_set_d_op(dentry, &pid_dentry_operations);
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
-        if (pid_revalidate(dentry, NULL))
+        if (pid_revalidate(dentry, 0))
                error = NULL;
 out:
        return error;
@@ -2630,7 +2633,7 @@ static const struct file_operations proc_attr_dir_operations = {
 };
 static struct dentry *proc_attr_dir_lookup(struct inode *dir,
-                                struct dentry *dentry, struct nameidata *nd)
+                                struct dentry *dentry, unsigned int flags)
 {
        return proc_pident_lookup(dir, dentry,
                                  attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
@@ -3114,7 +3117,8 @@ static const struct file_operations proc_tgid_base_operations = {
        .llseek         = default_llseek,
 };
-static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
        return proc_pident_lookup(dir, dentry,
                                  tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
 }
@@ -3237,13 +3241,13 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
-        if (pid_revalidate(dentry, NULL))
+        if (pid_revalidate(dentry, 0))
                error = NULL;
 out:
        return error;
 }
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
        struct dentry *result;
        struct task_struct *task;
@@ -3470,7 +3474,8 @@ static int proc_tid_base_readdir(struct file * filp,
                                   tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
 }
-static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+{
        return proc_pident_lookup(dir, dentry,
                                  tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
 }
@@ -3508,13 +3513,13 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
-        if (pid_revalidate(dentry, NULL))
+        if (pid_revalidate(dentry, 0))
                error = NULL;
 out:
        return error;
 }
-static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
        struct dentry *result = ERR_PTR(-ENOENT);
        struct task_struct *task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2edf34f2eb61..b3647fe6a608 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -446,7 +446,7 @@ out_unlock:
 }
 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
-                struct nameidata *nd)
+                unsigned int flags)
 {
        return proc_lookup_de(PDE(dir), dir, dentry);
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index eca4aca5b6e2..e1167a1c9126 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -106,7 +106,7 @@ void pde_users_dec(struct proc_dir_entry *pde);
 extern spinlock_t proc_subdir_lock;
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 unsigned long task_vsize(struct mm_struct *);
 unsigned long task_statm(struct mm_struct *,
@@ -132,7 +132,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data);
 * of the /proc/<pid> subdirectories.
 */
 int proc_readdir(struct file *, void *, filldir_t);
-struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
+struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
@@ -142,7 +142,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
        const char *name, int len,
        instantiate_t instantiate, struct task_struct *task, const void *ptr);
-int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
+int pid_revalidate(struct dentry *dentry, unsigned int flags);
 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
 extern const struct dentry_operations pid_dentry_operations;
 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 0d9e23a39e49..b178ed733c36 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -56,7 +56,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
        d_set_d_op(dentry, &pid_dentry_operations);
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
-        if (pid_revalidate(dentry, NULL))
+        if (pid_revalidate(dentry, 0))
                error = NULL;
 out:
        return error;
@@ -140,7 +140,7 @@ const struct file_operations proc_ns_dir_operations = {
 };
 static struct dentry *proc_ns_dir_lookup(struct inode *dir,
-                                struct dentry *dentry, struct nameidata *nd)
+                                struct dentry *dentry, unsigned int flags)
 {
        struct dentry *error;
        struct task_struct *task = get_proc_task(dir);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 927cbd115e53..df7dd08d4391 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -101,6 +101,11 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
 {
        struct proc_dir_entry *ent;
+        if (!oldprop) {
+                proc_device_tree_add_prop(pde, newprop);
+                return;
+        }
        for (ent = pde->subdir; ent != NULL; ent = ent->next)
                if (ent->data == oldprop)
                        break;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 06e1cc17caf6..fe72cd073dea 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir)
 }
 static struct dentry *proc_tgid_net_lookup(struct inode *dir,
-                struct dentry *dentry, struct nameidata *nd)
+                struct dentry *dentry, unsigned int flags)
 {
        struct dentry *de;
        struct net *net;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3476bca8f7af..dfafeb2b05a0 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -433,7 +433,7 @@ static struct ctl_table_header *grab_header(struct inode *inode)
 }
 static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
-                                        struct nameidata *nd)
+                                        unsigned int flags)
 {
        struct ctl_table_header *head = grab_header(dir);
        struct ctl_table_header *h = NULL;
@@ -794,9 +794,9 @@ static const struct inode_operations proc_sys_dir_operations = {
        .getattr        = proc_sys_getattr,
 };
-static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 {
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 }
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 7c30fce037c0..9a2d9fd7cadd 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -111,7 +111,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
                options = data;
        }
-        sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+        sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
        if (IS_ERR(sb))
                return ERR_CAST(sb);
@@ -121,7 +121,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
        }
        if (!sb->s_root) {
-                sb->s_flags = flags;
                err = proc_fill_super(sb);
                if (err) {
                        deactivate_locked_super(sb);
@@ -200,13 +199,12 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
        return 0;
 }
-static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
 {
-        if (!proc_lookup(dir, dentry, nd)) {
+        if (!proc_lookup(dir, dentry, flags))
                return NULL;
-        }
        
-        return proc_pid_lookup(dir, dentry, nd);
+        return proc_pid_lookup(dir, dentry, flags);
 }
 static int proc_root_readdir(struct file * filp,
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5e289a7cbad1..5fe34c355e85 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -17,7 +17,7 @@
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
-        struct proc_mounts *p = file->private_data;
+        struct proc_mounts *p = proc_mounts(file->private_data);
        struct mnt_namespace *ns = p->ns;
        unsigned res = POLLIN | POLLRDNORM;
@@ -121,7 +121,7 @@ out:
 static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
 {
-        struct proc_mounts *p = m->private;
+        struct proc_mounts *p = proc_mounts(m);
        struct mount *r = real_mount(mnt);
        struct super_block *sb = mnt->mnt_sb;
        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -268,7 +268,6 @@ static int mounts_open_common(struct inode *inode, struct file *file,
        if (ret)
                goto err_free;
-        p->m.private = p;
        p->ns = ns;
        p->root = root;
        p->m.poll_event = ns->event;
@@ -288,7 +287,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
 static int mounts_release(struct inode *inode, struct file *file)
 {
-        struct proc_mounts *p = file->private_data;
+        struct proc_mounts *p = proc_mounts(file->private_data);
        path_put(&p->root);
        put_mnt_ns(p->ns);
        return seq_release(inode, file);
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index a512c0b30e8e..d024505ba007 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -95,7 +95,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
        return NULL;
 }
-struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
        int ino;
        struct qnx4_inode_entry *de;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 244d4620189b..34e2d329c97e 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -23,7 +23,7 @@ struct qnx4_inode_info {
 };
 extern struct inode *qnx4_iget(struct super_block *, unsigned long);
-extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
+extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
 extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
 extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index e44012dc5645..2049c814bda4 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -622,7 +622,6 @@ static struct inode *qnx6_alloc_inode(struct super_block *sb)
 static void qnx6_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
 }
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 8a97289e04ad..0561326a94f5 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -13,7 +13,7 @@
 #include "qnx6.h"
 struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
-                                struct nameidata *nd)
+                                unsigned int flags)
 {
        unsigned ino;
        struct page *page;
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index 6c5e02a0b6a8..b00fcc960d37 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -45,7 +45,7 @@ struct qnx6_inode_info {
 extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
 extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
-                                        struct nameidata *nd);
+                                        unsigned int flags);
 #ifdef CONFIG_QNX6FS_DEBUG
 extern void qnx6_superblock_debug(struct qnx6_super_block *,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10cbe841cb7e..36a29b753c79 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -78,7 +78,7 @@
 #include <linux/quotaops.h>
 #include "../internal.h" /* ugh */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 /*
 * There are three quota SMP locks. dq_list_lock protects all lists with quotas
@@ -595,12 +595,14 @@ out:
 }
 EXPORT_SYMBOL(dquot_scan_active);
-int dquot_quota_sync(struct super_block *sb, int type, int wait)
+/* Write all dquot structures to quota files */
+int dquot_writeback_dquots(struct super_block *sb, int type)
 {
        struct list_head *dirty;
        struct dquot *dquot;
        struct quota_info *dqopt = sb_dqopt(sb);
        int cnt;
+        int err, ret = 0;
        mutex_lock(&dqopt->dqonoff_mutex);
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -624,7 +626,9 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
                        atomic_inc(&dquot->dq_count);
                        spin_unlock(&dq_list_lock);
                        dqstats_inc(DQST_LOOKUPS);
-                        sb->dq_op->write_dquot(dquot);
+                        err = sb->dq_op->write_dquot(dquot);
+                        if (!ret && err)
+                                err = ret;
                        dqput(dquot);
                        spin_lock(&dq_list_lock);
                }
@@ -638,7 +642,21 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
        dqstats_inc(DQST_SYNCS);
        mutex_unlock(&dqopt->dqonoff_mutex);
-        if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE))
+        return ret;
+}
+EXPORT_SYMBOL(dquot_writeback_dquots);
+/* Write all dquot structures to disk and make them visible from userspace */
+int dquot_quota_sync(struct super_block *sb, int type)
+{
+        struct quota_info *dqopt = sb_dqopt(sb);
+        int cnt;
+        int ret;
+        ret = dquot_writeback_dquots(sb, type);
+        if (ret)
+                return ret;
+        if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
                return 0;
        /* This is not very clever (and fast) but currently I don't know about
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 9a391204ca27..6f155788cbc6 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -9,7 +9,7 @@
 #include <linux/namei.h>
 #include <linux/slab.h>
 #include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 static void quota_sync_one(struct super_block *sb, void *arg)
 {
        if (sb->s_qcop && sb->s_qcop->quota_sync)
-                sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
+                sb->s_qcop->quota_sync(sb, *(int *)arg);
 }
 static int quota_sync_all(int type)
@@ -270,7 +270,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
        case Q_SYNC:
                if (!sb->s_qcop->quota_sync)
                        return -ENOSYS;
-                return sb->s_qcop->quota_sync(sb, type, 1);
+                return sb->s_qcop->quota_sync(sb, type);
        case Q_XQUOTAON:
        case Q_XQUOTAOFF:
        case Q_XQUOTARM:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a1fdabe21dec..eab8c09d3801 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
        return retval;
 }
-static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 {
        return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/read_write.c b/fs/read_write.c
index c20614f86c01..1adfb691e4f1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -55,10 +55,11 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
 * @file:       file structure to seek on
 * @offset:     file offset to seek to
 * @origin:     type of seek
- * @size:       max size of file system
+ * @size:       max size of this file in file system
+ * @eof:        offset used for SEEK_END position
 *
 * This is a variant of generic_file_llseek that allows passing in a custom
- * file size.
+ * maximum file size and a custom EOF position, for e.g. hashed directories
 *
 * Synchronization:
 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
@@ -67,13 +68,13 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
 */
 loff_t
 generic_file_llseek_size(struct file *file, loff_t offset, int origin,
-                loff_t maxsize)
+                loff_t maxsize, loff_t eof)
 {
        struct inode *inode = file->f_mapping->host;
        switch (origin) {
        case SEEK_END:
-                offset += i_size_read(inode);
+                offset += eof;
                break;
        case SEEK_CUR:
                /*
@@ -99,7 +100,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin,
                 * In the generic case the entire file is data, so as long as
                 * offset isn't at the end of the file then the offset is data.
                 */
-                if (offset >= i_size_read(inode))
+                if (offset >= eof)
                        return -ENXIO;
                break;
        case SEEK_HOLE:
@@ -107,9 +108,9 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin,
                 * There is a virtual hole at the end of the file, so as long as
                 * offset isn't i_size or larger, return i_size.
                 */
-                if (offset >= i_size_read(inode))
+                if (offset >= eof)
                        return -ENXIO;
-                offset = i_size_read(inode);
+                offset = eof;
                break;
        }
@@ -132,7 +133,8 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
        struct inode *inode = file->f_mapping->host;
        return generic_file_llseek_size(file, offset, origin,
-                                        inode->i_sb->s_maxbytes);
+                                        inode->i_sb->s_maxbytes,
+                                        i_size_read(inode));
 }
 EXPORT_SYMBOL(generic_file_llseek);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 84e8a69cee9d..8567fb847601 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -322,7 +322,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
 }
 static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
-                                      struct nameidata *nd)
+                                      unsigned int flags)
 {
        int retval;
        int lock_depth;
@@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
 }
 static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                           struct nameidata *nd)
+                           bool excl)
 {
        int retval;
        struct inode *inode;
@@ -634,8 +634,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
        reiserfs_update_inode_transaction(inode);
        reiserfs_update_inode_transaction(dir);
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        retval = journal_end(&th, dir->i_sb, jbegin_count);
      out_failed:
@@ -712,8 +712,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
                goto out_failed;
        }
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        retval = journal_end(&th, dir->i_sb, jbegin_count);
      out_failed:
@@ -800,8 +800,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
        // the above add_entry did not update dir's stat data
        reiserfs_update_sd(&th, dir);
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        retval = journal_end(&th, dir->i_sb, jbegin_count);
 out_failed:
        reiserfs_write_unlock_once(dir->i_sb, lock_depth);
@@ -1096,8 +1096,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
                goto out_failed;
        }
-        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
+        d_instantiate(dentry, inode);
        retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
      out_failed:
        reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 2c1ade692cc8..e60e87035bb3 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -403,7 +403,7 @@ static void *r_start(struct seq_file *m, loff_t * pos)
        if (l)
                return NULL;
-        if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s)))
+        if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s)))
                return NULL;
        up_write(&s->s_umount);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 651ce767b55d..7a37dabf5a96 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -68,6 +68,11 @@ static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
        struct reiserfs_transaction_handle th;
+        /*
+         * Writeback quota in non-journalled quota case - journalled quota has
+         * no dirty dquots
+         */
+        dquot_writeback_dquots(s, -1);
        reiserfs_write_lock(s);
        if (!journal_begin(&th, s, 1))
                if (!journal_end_sync(&th, s, 1))
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 46fc1c20a6b1..d319963aeb11 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -62,7 +62,7 @@
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
-        return dir->i_op->create(dir, dentry, mode, NULL);
+        return dir->i_op->create(dir, dentry, mode, true);
 }
 #endif
@@ -942,7 +942,7 @@ int reiserfs_permission(struct inode *inode, int mask)
        return generic_permission(inode, mask);
 }
-static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
 {
        return -EPERM;
 }
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e64f6b5f7ae5..77c5f2173983 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -210,7 +210,7 @@ out:
 * look up an entry in a directory
 */
 static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        unsigned long offset, maxoff;
        struct inode *inode;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0cbd0494b79e..14cf9de1dbe1 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
 }
 EXPORT_SYMBOL(seq_escape);
-int seq_printf(struct seq_file *m, const char *f, ...)
+int seq_vprintf(struct seq_file *m, const char *f, va_list args)
 {
-        va_list args;
        int len;
        if (m->count < m->size) {
-                va_start(args, f);
                len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
-                va_end(args);
                if (m->count + len < m->size) {
                        m->count += len;
                        return 0;
@@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...)
        seq_set_overflow(m);
        return -1;
 }
+EXPORT_SYMBOL(seq_vprintf);
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+        int ret;
+        va_list args;
+        va_start(args, f);
+        ret = seq_vprintf(m, f, args);
+        va_end(args);
+        return ret;
+}
 EXPORT_SYMBOL(seq_printf);
 /**
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index abcc58f3c152..7834a517f7f4 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -134,7 +134,7 @@ out:
 static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
-                                 struct nameidata *nd)
+                                 unsigned int flags)
 {
        const unsigned char *name = dentry->d_name.name;
        int len = dentry->d_name.len;
diff --git a/fs/super.c b/fs/super.c
index cf001775617f..c743fb3be4b8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -105,11 +105,12 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
 /**
 *      alloc_super     -       create new superblock
 *      @type:  filesystem type superblock should belong to
+ *      @flags: the mount flags
 *
 *      Allocates and initializes a new &struct super_block.  alloc_super()
 *      returns a pointer new superblock or %NULL if allocation had failed.
 */
-static struct super_block *alloc_super(struct file_system_type *type)
+static struct super_block *alloc_super(struct file_system_type *type, int flags)
 {
        struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
        static const struct super_operations default_op;
@@ -136,6 +137,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 #else
                INIT_LIST_HEAD(&s->s_files);
 #endif
+                s->s_flags = flags;
                s->s_bdi = &default_backing_dev_info;
                INIT_HLIST_NODE(&s->s_instances);
                INIT_HLIST_BL_HEAD(&s->s_anon);
@@ -415,11 +417,13 @@ EXPORT_SYMBOL(generic_shutdown_super);
 *      @type:  filesystem type superblock should belong to
 *      @test:  comparison callback
 *      @set:   setup callback
+ *      @flags: mount flags
 *      @data:  argument to each of them
 */
 struct super_block *sget(struct file_system_type *type,
                        int (*test)(struct super_block *,void *),
                        int (*set)(struct super_block *,void *),
+                        int flags,
                        void *data)
 {
        struct super_block *s = NULL;
@@ -450,7 +454,7 @@ retry:
        }
        if (!s) {
                spin_unlock(&sb_lock);
-                s = alloc_super(type);
+                s = alloc_super(type, flags);
                if (!s)
                        return ERR_PTR(-ENOMEM);
                goto retry;
@@ -925,13 +929,12 @@ struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
 {
        struct super_block *sb;
-        sb = sget(fs_type, ns_test_super, ns_set_super, data);
+        sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
        if (IS_ERR(sb))
                return ERR_CAST(sb);
        if (!sb->s_root) {
                int err;
-                sb->s_flags = flags;
                err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (err) {
                        deactivate_locked_super(sb);
@@ -992,7 +995,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
                error = -EBUSY;
                goto error_bdev;
        }
-        s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+        s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
+                 bdev);
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
        if (IS_ERR(s))
                goto error_s;
@@ -1017,7 +1021,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
        } else {
                char b[BDEVNAME_SIZE];
-                s->s_flags = flags | MS_NOSEC;
                s->s_mode = mode;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
                sb_set_blocksize(s, block_size(bdev));
@@ -1062,13 +1065,11 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
        int (*fill_super)(struct super_block *, void *, int))
 {
        int error;
-        struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
+        struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
        if (IS_ERR(s))
                return ERR_CAST(s);
-        s->s_flags = flags;
        error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
        if (error) {
                deactivate_locked_super(s);
@@ -1091,11 +1092,10 @@ struct dentry *mount_single(struct file_system_type *fs_type,
        struct super_block *s;
        int error;
-        s = sget(fs_type, compare_single, set_anon_super, NULL);
+        s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
        if (IS_ERR(s))
                return ERR_CAST(s);
        if (!s->s_root) {
-                s->s_flags = flags;
                error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(s);
diff --git a/fs/sync.c b/fs/sync.c
index 11e3d1c44901..eb8722dc556f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,16 +29,6 @@
 */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-        /*
-         * This should be safe, as we require bdi backing to actually
-         * write out data in the first place
-         */
-        if (sb->s_bdi == &noop_backing_dev_info)
-                return 0;
-        if (sb->s_qcop && sb->s_qcop->quota_sync)
-                sb->s_qcop->quota_sync(sb, -1, wait);
        if (wait)
                sync_inodes_sb(sb);
        else
@@ -77,29 +67,48 @@ int sync_filesystem(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(sync_filesystem);
-static void sync_one_sb(struct super_block *sb, void *arg)
+static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
        if (!(sb->s_flags & MS_RDONLY))
-                __sync_filesystem(sb, *(int *)arg);
+                sync_inodes_sb(sb);
 }
-/*
- * Sync all the data for all the filesystems (called by sys_sync() and
+static void sync_fs_one_sb(struct super_block *sb, void *arg)
- * emergency sync)
- */
-static void sync_filesystems(int wait)
 {
-        iterate_supers(sync_one_sb, &wait);
+        if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
+                sb->s_op->sync_fs(sb, *(int *)arg);
+}
+static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
+{
+        filemap_fdatawrite(bdev->bd_inode->i_mapping);
+}
+static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
+{
+        filemap_fdatawait(bdev->bd_inode->i_mapping);
 }
 /*
- * sync everything.  Start out by waking pdflush, because that writes back
+ * Sync everything. We start by waking flusher threads so that most of
- * all queues in parallel.
+ * writeback runs on all devices in parallel. Then we sync all inodes reliably
+ * which effectively also waits for all flusher threads to finish doing
+ * writeback. At this point all data is on disk so metadata should be stable
+ * and we tell filesystems to sync their metadata via ->sync_fs() calls.
+ * Finally, we writeout all block devices because some filesystems (e.g. ext2)
+ * just write metadata (such as inodes or bitmaps) to block device page cache
+ * and do not sync it on their own in ->sync_fs().
 */
 SYSCALL_DEFINE0(sync)
 {
+        int nowait = 0, wait = 1;
        wakeup_flusher_threads(0, WB_REASON_SYNC);
-        sync_filesystems(0);
+        iterate_supers(sync_inodes_one_sb, NULL);
-        sync_filesystems(1);
+        iterate_supers(sync_fs_one_sb, &nowait);
+        iterate_supers(sync_fs_one_sb, &wait);
+        iterate_bdevs(fdatawrite_one_bdev, NULL);
+        iterate_bdevs(fdatawait_one_bdev, NULL);
        if (unlikely(laptop_mode))
                laptop_sync_completion();
        return 0;
@@ -107,12 +116,18 @@ SYSCALL_DEFINE0(sync)
 static void do_sync_work(struct work_struct *work)
 {
+        int nowait = 0;
        /*
         * Sync twice to reduce the possibility we skipped some inodes / pages
         * because they were temporarily locked
         */
-        sync_filesystems(0);
+        iterate_supers(sync_inodes_one_sb, &nowait);
-        sync_filesystems(0);
+        iterate_supers(sync_fs_one_sb, &nowait);
+        iterate_bdevs(fdatawrite_one_bdev, NULL);
+        iterate_supers(sync_inodes_one_sb, &nowait);
+        iterate_supers(sync_fs_one_sb, &nowait);
+        iterate_bdevs(fdatawrite_one_bdev, NULL);
        printk("Emergency Sync complete\n");
        kfree(work);
 }
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e6bb9b2a4cbe..a5cf784f9cc2 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -300,15 +300,15 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
 static int sysfs_dentry_delete(const struct dentry *dentry)
 {
        struct sysfs_dirent *sd = dentry->d_fsdata;
-        return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
+        return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
 }
-static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct sysfs_dirent *sd;
        int is_dir;
-        if (nd->flags & LOOKUP_RCU)
+        if (flags & LOOKUP_RCU)
                return -ECHILD;
        sd = dentry->d_fsdata;
@@ -355,18 +355,15 @@ out_bad:
        return 0;
 }
-static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
+static void sysfs_dentry_release(struct dentry *dentry)
 {
-        struct sysfs_dirent * sd = dentry->d_fsdata;
+        sysfs_put(dentry->d_fsdata);
-        sysfs_put(sd);
-        iput(inode);
 }
-static const struct dentry_operations sysfs_dentry_ops = {
+const struct dentry_operations sysfs_dentry_ops = {
        .d_revalidate   = sysfs_dentry_revalidate,
        .d_delete       = sysfs_dentry_delete,
-        .d_iput         = sysfs_dentry_iput,
+        .d_release      = sysfs_dentry_release,
 };
 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
@@ -764,7 +761,7 @@ int sysfs_create_dir(struct kobject * kobj)
 }
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
-                                struct nameidata *nd)
+                                unsigned int flags)
 {
        struct dentry *ret = NULL;
        struct dentry *parent = dentry->d_parent;
@@ -786,6 +783,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
                ret = ERR_PTR(-ENOENT);
                goto out_unlock;
        }
+        dentry->d_fsdata = sysfs_get(sd);
        /* attach dentry and inode */
        inode = sysfs_get_inode(dir->i_sb, sd);
@@ -795,16 +793,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
        }
        /* instantiate and hash dentry */
-        ret = d_find_alias(inode);
+        ret = d_materialise_unique(dentry, inode);
-        if (!ret) {
-                d_set_d_op(dentry, &sysfs_dentry_ops);
-                dentry->d_fsdata = sysfs_get(sd);
-                d_add(dentry, inode);
-        } else {
-                d_move(ret, dentry);
-                iput(inode);
-        }
 out_unlock:
        mutex_unlock(&sysfs_mutex);
        return ret;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 52c3bdb66a84..71eb7e253927 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -68,6 +68,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
        }
        root->d_fsdata = &sysfs_root;
        sb->s_root = root;
+        sb->s_d_op = &sysfs_dentry_ops;
        return 0;
 }
@@ -117,13 +118,12 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
        for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
                info->ns[type] = kobj_ns_grab_current(type);
-        sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
+        sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
        if (IS_ERR(sb) || sb->s_fs_info != info)
                free_sysfs_super_info(info);
        if (IS_ERR(sb))
                return ERR_CAST(sb);
        if (!sb->s_root) {
-                sb->s_flags = flags;
                error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(sb);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 661a9639570b..d73c0932bbd6 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -157,6 +157,7 @@ extern struct kmem_cache *sysfs_dir_cachep;
 */
 extern struct mutex sysfs_mutex;
 extern spinlock_t sysfs_assoc_lock;
+extern const struct dentry_operations sysfs_dentry_ops;
 extern const struct file_operations sysfs_dir_operations;
 extern const struct inode_operations sysfs_dir_inode_operations;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 08d0b2568cd3..80e1e2b18df1 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -43,7 +43,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
         * then attach current time stamp.
         * But if the filesystem was marked clean, keep it clean.
         */
-        sb->s_dirt = 0;
        old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
        if (sbi->s_type == FSTYPE_SYSV4) {
                if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
@@ -57,23 +56,12 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
        return 0;
 }
-static void sysv_write_super(struct super_block *sb)
-{
-        if (!(sb->s_flags & MS_RDONLY))
-                sysv_sync_fs(sb, 1);
-        else
-                sb->s_dirt = 0;
-}
 static int sysv_remount(struct super_block *sb, int *flags, char *data)
 {
        struct sysv_sb_info *sbi = SYSV_SB(sb);
-        lock_super(sb);
        if (sbi->s_forced_ro)
                *flags |= MS_RDONLY;
-        if (*flags & MS_RDONLY)
-                sysv_write_super(sb);
-        unlock_super(sb);
        return 0;
 }
@@ -81,9 +69,6 @@ static void sysv_put_super(struct super_block *sb)
 {
        struct sysv_sb_info *sbi = SYSV_SB(sb);
-        if (sb->s_dirt)
-                sysv_write_super(sb);
        if (!(sb->s_flags & MS_RDONLY)) {
                /* XXX ext2 also updates the state here */
                mark_buffer_dirty(sbi->s_bh1);
@@ -357,7 +342,6 @@ const struct super_operations sysv_sops = {
        .write_inode    = sysv_write_inode,
        .evict_inode    = sysv_evict_inode,
        .put_super      = sysv_put_super,
-        .write_super    = sysv_write_super,
        .sync_fs        = sysv_sync_fs,
        .remount_fs     = sysv_remount,
        .statfs         = sysv_statfs,
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index d7466e293614..1c0d5f264767 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -43,7 +43,7 @@ const struct dentry_operations sysv_dentry_operations = {
        .d_hash         = sysv_hash,
 };
-static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
 {
        struct inode * inode = NULL;
        ino_t ino;
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode,
        return err;
 }
-static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
+static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
 {
        return sysv_mknod(dir, dentry, mode, 0);
 }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 11b07672f6c5..0bc35fdc58e2 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -117,7 +117,6 @@ static inline void dirty_sb(struct super_block *sb)
        mark_buffer_dirty(sbi->s_bh1);
        if (sbi->s_bh1 != sbi->s_bh2)
                mark_buffer_dirty(sbi->s_bh2);
-        sb->s_dirt = 1;
 }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 92df3b081539..bb3167257aab 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2802,6 +2802,8 @@ static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
                val = d->chk_fs;
        else if (dent == d->dfs_tst_rcvry)
                val = d->tst_rcvry;
+        else if (dent == d->dfs_ro_error)
+                val = c->ro_error;
        else
                return -EINVAL;
@@ -2885,6 +2887,8 @@ static ssize_t dfs_file_write(struct file *file, const char __user *u,
                d->chk_fs = val;
        else if (dent == d->dfs_tst_rcvry)
                d->tst_rcvry = val;
+        else if (dent == d->dfs_ro_error)
+                c->ro_error = !!val;
        else
                return -EINVAL;
@@ -2996,6 +3000,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
                goto out_remove;
        d->dfs_tst_rcvry = dent;
+        fname = "ro_error";
+        dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
+                                   &dfs_fops);
+        if (IS_ERR_OR_NULL(dent))
+                goto out_remove;
+        d->dfs_ro_error = dent;
        return 0;
 out_remove:
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 486a8e024fb6..8b8cc4e945f4 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -79,6 +79,10 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
 * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
 * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
 * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
+ * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to
+ *                re-mounting to R/O mode because it does not flush any buffers
+ *                and UBIFS just starts returning -EROFS on all write
+ *               operations)
 */
 struct ubifs_debug_info {
        struct ubifs_zbranch old_zroot;
@@ -122,6 +126,7 @@ struct ubifs_debug_info {
        struct dentry *dfs_chk_lprops;
        struct dentry *dfs_chk_fs;
        struct dentry *dfs_tst_rcvry;
+        struct dentry *dfs_ro_error;
 };
 /**
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index a6d42efc76d2..c95681cf1b71 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -184,7 +184,7 @@ static int dbg_check_name(const struct ubifs_info *c,
 }
 static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
-                                   struct nameidata *nd)
+                                   unsigned int flags)
 {
        int err;
        union ubifs_key key;
@@ -246,7 +246,7 @@ out:
 }
 static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                        struct nameidata *nd)
+                        bool excl)
 {
        struct inode *inode;
        struct ubifs_info *c = dir->i_sb->s_fs_info;
@@ -969,7 +969,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
                        .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
        struct timespec time;
-        unsigned int saved_nlink;
+        unsigned int uninitialized_var(saved_nlink);
        /*
         * Budget request settings: deletion direntry, new direntry, removing
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index b02734db187c..cebf17ea0458 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -176,7 +176,7 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
                *last = orphan;
                last = &orphan->cnext;
        }
-        *last = orphan->cnext;
+        *last = NULL;
        c->cmt_orphans = c->new_orphans;
        c->new_orphans = 0;
        dbg_cmt("%d orphans to commit", c->cmt_orphans);
@@ -382,7 +382,7 @@ static int consolidate(struct ubifs_info *c)
                        last = &orphan->cnext;
                        cnt += 1;
                }
-                *last = orphan->cnext;
+                *last = NULL;
                ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
                c->cmt_orphans = cnt;
                c->ohead_lnum = c->orph_first;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 3a2da7e476e5..eba46d4a7619 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1007,7 +1007,7 @@ out:
 */
 int ubifs_replay_journal(struct ubifs_info *c)
 {
-        int err, i, lnum, offs, free;
+        int err, lnum, free;
        BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
@@ -1025,25 +1025,17 @@ int ubifs_replay_journal(struct ubifs_info *c)
        dbg_mnt("start replaying the journal");
        c->replaying = 1;
        lnum = c->ltail_lnum = c->lhead_lnum;
-        offs = c->lhead_offs;
-        for (i = 0; i < c->log_lebs; i++, lnum++) {
+        lnum = UBIFS_LOG_LNUM;
-                if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
+        do {
-                        /*
+                err = replay_log_leb(c, lnum, 0, c->sbuf);
-                         * The log is logically circular, we reached the last
-                         * LEB, switch to the first one.
-                         */
-                        lnum = UBIFS_LOG_LNUM;
-                        offs = 0;
-                }
-                err = replay_log_leb(c, lnum, offs, c->sbuf);
                if (err == 1)
                        /* We hit the end of the log */
                        break;
                if (err)
                        goto out;
-                offs = 0;
+                lnum = ubifs_next_log_lnum(c, lnum);
-        }
+        } while (lnum != UBIFS_LOG_LNUM);
        err = replay_buds(c);
        if (err)
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ef3d1ba6d992..15e2fc5aa60b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)
                lnum = ubifs_next_log_lnum(c, lnum);
        }
-        /* Fixup the current log head */
+        /*
-        err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+         * Fixup the log head which contains the only a CS node at the
+         * beginning.
+         */
+        err = fixup_leb(c, c->lhead_lnum,
+                        ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
        if (err)
                goto out;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5862dd9d2784..1c766c39c038 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2136,7 +2136,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
        dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
-        sb = sget(fs_type, sb_test, sb_set, c);
+        sb = sget(fs_type, sb_test, sb_set, flags, c);
        if (IS_ERR(sb)) {
                err = PTR_ERR(sb);
                kfree(c);
@@ -2153,7 +2153,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
                        goto out_deact;
                }
        } else {
-                sb->s_flags = flags;
                err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
                if (err)
                        goto out_deact;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 873e1bab9c4c..fafaad795cd6 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1247,7 +1247,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
 {
        struct fileEntry *fe;
        struct extendedFileEntry *efe;
-        int offset;
        struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
        struct udf_inode_info *iinfo = UDF_I(inode);
        unsigned int link_count;
@@ -1359,7 +1358,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
                iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
                iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
                iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
-                offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr;
        } else {
                inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
                    (inode->i_sb->s_blocksize_bits - 9);
@@ -1381,8 +1379,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
                iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
                iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
                iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
-                offset = sizeof(struct extendedFileEntry) +
-                                                        iinfo->i_lenEAttr;
        }
        switch (fe->icbTag.fileType) {
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 18024178ac4c..95fee278ab9d 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -251,7 +251,7 @@ out_ok:
 }
 static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
-                                 struct nameidata *nd)
+                                 unsigned int flags)
 {
        struct inode *inode = NULL;
        struct fileIdentDesc cfi;
@@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
 }
 static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      struct nameidata *nd)
+                      bool excl)
 {
        struct udf_fileident_bh fibh;
        struct inode *inode;
@@ -1279,6 +1279,7 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
        *lenp = 3;
        fid->udf.block = location.logicalBlockNum;
        fid->udf.partref = location.partitionReferenceNum;
+        fid->udf.parent_partref = 0;
        fid->udf.generation = inode->i_generation;
        if (parent) {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8d86a8706c0e..dcbf98722afc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -252,6 +252,63 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
        return 0;
 }
+static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
+{
+        int i;
+        int nr_groups = bitmap->s_nr_groups;
+        int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
+                                                nr_groups);
+        for (i = 0; i < nr_groups; i++)
+                if (bitmap->s_block_bitmap[i])
+                        brelse(bitmap->s_block_bitmap[i]);
+        if (size <= PAGE_SIZE)
+                kfree(bitmap);
+        else
+                vfree(bitmap);
+}
+static void udf_free_partition(struct udf_part_map *map)
+{
+        int i;
+        struct udf_meta_data *mdata;
+        if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
+                iput(map->s_uspace.s_table);
+        if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
+                iput(map->s_fspace.s_table);
+        if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
+                udf_sb_free_bitmap(map->s_uspace.s_bitmap);
+        if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
+                udf_sb_free_bitmap(map->s_fspace.s_bitmap);
+        if (map->s_partition_type == UDF_SPARABLE_MAP15)
+                for (i = 0; i < 4; i++)
+                        brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
+        else if (map->s_partition_type == UDF_METADATA_MAP25) {
+                mdata = &map->s_type_specific.s_metadata;
+                iput(mdata->s_metadata_fe);
+                mdata->s_metadata_fe = NULL;
+                iput(mdata->s_mirror_fe);
+                mdata->s_mirror_fe = NULL;
+                iput(mdata->s_bitmap_fe);
+                mdata->s_bitmap_fe = NULL;
+        }
+}
+static void udf_sb_free_partitions(struct super_block *sb)
+{
+        struct udf_sb_info *sbi = UDF_SB(sb);
+        int i;
+        for (i = 0; i < sbi->s_partitions; i++)
+                udf_free_partition(&sbi->s_partmaps[i]);
+        kfree(sbi->s_partmaps);
+        sbi->s_partmaps = NULL;
+}
 static int udf_show_options(struct seq_file *seq, struct dentry *root)
 {
        struct super_block *sb = root->d_sb;
@@ -1283,7 +1340,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
        BUG_ON(ident != TAG_IDENT_LVD);
        lvd = (struct logicalVolDesc *)bh->b_data;
        table_len = le32_to_cpu(lvd->mapTableLength);
-        if (sizeof(*lvd) + table_len > sb->s_blocksize) {
+        if (table_len > sb->s_blocksize - sizeof(*lvd)) {
                udf_err(sb, "error loading logical volume descriptor: "
                        "Partition table too long (%u > %lu)\n", table_len,
                        sb->s_blocksize - sizeof(*lvd));
@@ -1596,7 +1653,11 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
        /* responsible for finding the PartitionDesc(s) */
        if (!udf_process_sequence(sb, main_s, main_e, fileset))
                return 1;
-        return !udf_process_sequence(sb, reserve_s, reserve_e, fileset);
+        udf_sb_free_partitions(sb);
+        if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset))
+                return 1;
+        udf_sb_free_partitions(sb);
+        return 0;
 }
 /*
@@ -1861,55 +1922,8 @@ u64 lvid_get_unique_id(struct super_block *sb)
        return ret;
 }
-static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
-{
-        int i;
-        int nr_groups = bitmap->s_nr_groups;
-        int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
-                                                nr_groups);
-        for (i = 0; i < nr_groups; i++)
-                if (bitmap->s_block_bitmap[i])
-                        brelse(bitmap->s_block_bitmap[i]);
-        if (size <= PAGE_SIZE)
-                kfree(bitmap);
-        else
-                vfree(bitmap);
-}
-static void udf_free_partition(struct udf_part_map *map)
-{
-        int i;
-        struct udf_meta_data *mdata;
-        if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
-                iput(map->s_uspace.s_table);
-        if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
-                iput(map->s_fspace.s_table);
-        if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
-                udf_sb_free_bitmap(map->s_uspace.s_bitmap);
-        if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
-                udf_sb_free_bitmap(map->s_fspace.s_bitmap);
-        if (map->s_partition_type == UDF_SPARABLE_MAP15)
-                for (i = 0; i < 4; i++)
-                        brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
-        else if (map->s_partition_type == UDF_METADATA_MAP25) {
-                mdata = &map->s_type_specific.s_metadata;
-                iput(mdata->s_metadata_fe);
-                mdata->s_metadata_fe = NULL;
-                iput(mdata->s_mirror_fe);
-                mdata->s_mirror_fe = NULL;
-                iput(mdata->s_bitmap_fe);
-                mdata->s_bitmap_fe = NULL;
-        }
-}
 static int udf_fill_super(struct super_block *sb, void *options, int silent)
 {
-        int i;
        int ret;
        struct inode *inode = NULL;
        struct udf_options uopt;
@@ -1974,7 +1988,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
        sb->s_op = &udf_sb_ops;
        sb->s_export_op = &udf_export_ops;
-        sb->s_dirt = 0;
        sb->s_magic = UDF_SUPER_MAGIC;
        sb->s_time_gran = 1000;
@@ -2072,9 +2085,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 error_out:
        if (sbi->s_vat_inode)
                iput(sbi->s_vat_inode);
-        if (sbi->s_partitions)
-                for (i = 0; i < sbi->s_partitions; i++)
-                        udf_free_partition(&sbi->s_partmaps[i]);
 #ifdef CONFIG_UDF_NLS
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
                unload_nls(sbi->s_nls_map);
@@ -2082,8 +2092,7 @@ error_out:
        if (!(sb->s_flags & MS_RDONLY))
                udf_close_lvid(sb);
        brelse(sbi->s_lvid_bh);
+        udf_sb_free_partitions(sb);
-        kfree(sbi->s_partmaps);
        kfree(sbi);
        sb->s_fs_info = NULL;
@@ -2096,10 +2105,6 @@ void _udf_err(struct super_block *sb, const char *function,
        struct va_format vaf;
        va_list args;
-        /* mark sb error */
-        if (!(sb->s_flags & MS_RDONLY))
-                sb->s_dirt = 1;
        va_start(args, fmt);
        vaf.fmt = fmt;
@@ -2128,16 +2133,12 @@ void _udf_warn(struct super_block *sb, const char *function,
 static void udf_put_super(struct super_block *sb)
 {
-        int i;
        struct udf_sb_info *sbi;
        sbi = UDF_SB(sb);
        if (sbi->s_vat_inode)
                iput(sbi->s_vat_inode);
-        if (sbi->s_partitions)
-                for (i = 0; i < sbi->s_partitions; i++)
-                        udf_free_partition(&sbi->s_partmaps[i]);
 #ifdef CONFIG_UDF_NLS
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
                unload_nls(sbi->s_nls_map);
@@ -2145,7 +2146,7 @@ static void udf_put_super(struct super_block *sb)
        if (!(sb->s_flags & MS_RDONLY))
                udf_close_lvid(sb);
        brelse(sbi->s_lvid_bh);
-        kfree(sbi->s_partmaps);
+        udf_sb_free_partitions(sb);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
 }
@@ -2161,7 +2162,6 @@ static int udf_sync_fs(struct super_block *sb, int wait)
                 * the buffer for IO
                 */
                mark_buffer_dirty(sbi->s_lvid_bh);
-                sb->s_dirt = 0;
                sbi->s_lvid_dirty = 0;
        }
        mutex_unlock(&sbi->s_alloc_mutex);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 4b98fee8e161..8a9657d7f7c6 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -248,7 +248,7 @@ void udf_truncate_extents(struct inode *inode)
                                /* We managed to free all extents in the
                                 * indirect extent - free it too */
                                BUG_ON(!epos.bh);
-                                udf_free_blocks(sb, inode, &epos.block,
+                                udf_free_blocks(sb, NULL, &epos.block,
                                                0, indirect_ext_len);
                        } else if (!epos.bh) {
                                iinfo->i_lenAlloc = lenalloc;
@@ -275,7 +275,7 @@ void udf_truncate_extents(struct inode *inode)
        if (indirect_ext_len) {
                BUG_ON(!epos.bh);
-                udf_free_blocks(sb, inode, &epos.block, 0, indirect_ext_len);
+                udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len);
        } else if (!epos.bh) {
                iinfo->i_lenAlloc = lenalloc;
                mark_inode_dirty(inode);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index ebe10314e512..de038da6f6bd 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -129,7 +129,6 @@ static inline void udf_updated_lvid(struct super_block *sb)
        WARN_ON_ONCE(((struct logicalVolIntegrityDesc *)
                     bh->b_data)->integrityType !=
                     cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN));
-        sb->s_dirt = 1;
        UDF_SB(sb)->s_lvid_dirty = 1;
 }
 extern u64 lvid_get_unique_id(struct super_block *sb);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 42694e11c23d..1b3e410bf334 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -116,7 +116,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
        ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
        if (sb->s_flags & MS_SYNCHRONOUS)
                ubh_sync_block(UCPI_UBH(ucpi));
-        sb->s_dirt = 1;
+        ufs_mark_sb_dirty(sb);
        
        unlock_super (sb);
        UFSD("EXIT\n");
@@ -214,7 +214,7 @@ do_more:
                goto do_more;
        }
-        sb->s_dirt = 1;
+        ufs_mark_sb_dirty(sb);
        unlock_super (sb);
        UFSD("EXIT\n");
        return;
@@ -557,7 +557,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
        ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
        if (sb->s_flags & MS_SYNCHRONOUS)
                ubh_sync_block(UCPI_UBH(ucpi));
-        sb->s_dirt = 1;
+        ufs_mark_sb_dirty(sb);
        UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
        
@@ -677,7 +677,7 @@ succed:
        ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
        if (sb->s_flags & MS_SYNCHRONOUS)
                ubh_sync_block(UCPI_UBH(ucpi));
-        sb->s_dirt = 1;
+        ufs_mark_sb_dirty(sb);
        result += cgno * uspi->s_fpg;
        UFSD("EXIT3, result %llu\n", (unsigned long long)result);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 4ec5c1085a87..e84cbe21b986 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -116,7 +116,7 @@ void ufs_free_inode (struct inode * inode)
        if (sb->s_flags & MS_SYNCHRONOUS)
                ubh_sync_block(UCPI_UBH(ucpi));
        
-        sb->s_dirt = 1;
+        ufs_mark_sb_dirty(sb);
        unlock_super (sb);
        UFSD("EXIT\n");
 }
@@ -288,7 +288,7 @@ cg_found:
        ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
        if (sb->s_flags & MS_SYNCHRONOUS)
                ubh_sync_block(UCPI_UBH(ucpi));
-        sb->s_dirt = 1;
+        ufs_mark_sb_dirty(sb);
        inode->i_ino = cg * uspi->s_ipg + bit;
        inode_init_owner(inode, dir, mode);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index a2281cadefa1..90d74b8f8eba 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -46,7 +46,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
        return err;
 }
-static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
 {
        struct inode * inode = NULL;
        ino_t ino;
@@ -71,7 +71,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
 * with d_instantiate(). 
 */
 static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
-                struct nameidata *nd)
+                bool excl)
 {
        struct inode *inode;
        int err;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 302f340d0071..444927e5706b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -302,7 +302,7 @@ void ufs_error (struct super_block * sb, const char * function,
        if (!(sb->s_flags & MS_RDONLY)) {
                usb1->fs_clean = UFS_FSBAD;
                ubh_mark_buffer_dirty(USPI_UBH(uspi));
-                sb->s_dirt = 1;
+                ufs_mark_sb_dirty(sb);
                sb->s_flags |= MS_RDONLY;
        }
        va_start (args, fmt);
@@ -334,7 +334,7 @@ void ufs_panic (struct super_block * sb, const char * function,
        if (!(sb->s_flags & MS_RDONLY)) {
                usb1->fs_clean = UFS_FSBAD;
                ubh_mark_buffer_dirty(USPI_UBH(uspi));
-                sb->s_dirt = 1;
+                ufs_mark_sb_dirty(sb);
        }
        va_start (args, fmt);
        vsnprintf (error_buf, sizeof(error_buf), fmt, args);
@@ -691,6 +691,83 @@ static void ufs_put_super_internal(struct super_block *sb)
        UFSD("EXIT\n");
 }
+static int ufs_sync_fs(struct super_block *sb, int wait)
+{
+        struct ufs_sb_private_info * uspi;
+        struct ufs_super_block_first * usb1;
+        struct ufs_super_block_third * usb3;
+        unsigned flags;
+        lock_ufs(sb);
+        lock_super(sb);
+        UFSD("ENTER\n");
+        flags = UFS_SB(sb)->s_flags;
+        uspi = UFS_SB(sb)->s_uspi;
+        usb1 = ubh_get_usb_first(uspi);
+        usb3 = ubh_get_usb_third(uspi);
+        usb1->fs_time = cpu_to_fs32(sb, get_seconds());
+        if ((flags & UFS_ST_MASK) == UFS_ST_SUN  ||
+            (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
+            (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
+                ufs_set_fs_state(sb, usb1, usb3,
+                                UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
+        ufs_put_cstotal(sb);
+        UFSD("EXIT\n");
+        unlock_super(sb);
+        unlock_ufs(sb);
+        return 0;
+}
+static void delayed_sync_fs(struct work_struct *work)
+{
+        struct ufs_sb_info *sbi;
+        sbi = container_of(work, struct ufs_sb_info, sync_work.work);
+        spin_lock(&sbi->work_lock);
+        sbi->work_queued = 0;
+        spin_unlock(&sbi->work_lock);
+        ufs_sync_fs(sbi->sb, 1);
+}
+void ufs_mark_sb_dirty(struct super_block *sb)
+{
+        struct ufs_sb_info *sbi = UFS_SB(sb);
+        unsigned long delay;
+        spin_lock(&sbi->work_lock);
+        if (!sbi->work_queued) {
+                delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+                queue_delayed_work(system_long_wq, &sbi->sync_work, delay);
+                sbi->work_queued = 1;
+        }
+        spin_unlock(&sbi->work_lock);
+}
+static void ufs_put_super(struct super_block *sb)
+{
+        struct ufs_sb_info * sbi = UFS_SB(sb);
+        UFSD("ENTER\n");
+        if (!(sb->s_flags & MS_RDONLY))
+                ufs_put_super_internal(sb);
+        cancel_delayed_work_sync(&sbi->sync_work);
+        ubh_brelse_uspi (sbi->s_uspi);
+        kfree (sbi->s_uspi);
+        kfree (sbi);
+        sb->s_fs_info = NULL;
+        UFSD("EXIT\n");
+        return;
+}
 static int ufs_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct ufs_sb_info * sbi;
@@ -716,6 +793,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
        if (!sbi)
                goto failed_nomem;
        sb->s_fs_info = sbi;
+        sbi->sb = sb;
        UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
        
@@ -727,6 +805,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
        }
 #endif
        mutex_init(&sbi->mutex);
+        spin_lock_init(&sbi->work_lock);
+        INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
        /*
         * Set default mount options
         * Parse mount options
@@ -1191,68 +1271,6 @@ failed_nomem:
        return -ENOMEM;
 }
-static int ufs_sync_fs(struct super_block *sb, int wait)
-{
-        struct ufs_sb_private_info * uspi;
-        struct ufs_super_block_first * usb1;
-        struct ufs_super_block_third * usb3;
-        unsigned flags;
-        lock_ufs(sb);
-        lock_super(sb);
-        UFSD("ENTER\n");
-        flags = UFS_SB(sb)->s_flags;
-        uspi = UFS_SB(sb)->s_uspi;
-        usb1 = ubh_get_usb_first(uspi);
-        usb3 = ubh_get_usb_third(uspi);
-        usb1->fs_time = cpu_to_fs32(sb, get_seconds());
-        if ((flags & UFS_ST_MASK) == UFS_ST_SUN  ||
-            (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
-            (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
-                ufs_set_fs_state(sb, usb1, usb3,
-                                UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
-        ufs_put_cstotal(sb);
-        sb->s_dirt = 0;
-        UFSD("EXIT\n");
-        unlock_super(sb);
-        unlock_ufs(sb);
-        return 0;
-}
-static void ufs_write_super(struct super_block *sb)
-{
-        if (!(sb->s_flags & MS_RDONLY))
-                ufs_sync_fs(sb, 1);
-        else
-                sb->s_dirt = 0;
-}
-static void ufs_put_super(struct super_block *sb)
-{
-        struct ufs_sb_info * sbi = UFS_SB(sb);
-                
-        UFSD("ENTER\n");
-        if (sb->s_dirt)
-                ufs_write_super(sb);
-        if (!(sb->s_flags & MS_RDONLY))
-                ufs_put_super_internal(sb);
-        
-        ubh_brelse_uspi (sbi->s_uspi);
-        kfree (sbi->s_uspi);
-        kfree (sbi);
-        sb->s_fs_info = NULL;
-        UFSD("EXIT\n");
-        return;
-}
 static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
 {
        struct ufs_sb_private_info * uspi;
@@ -1308,7 +1326,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
                        ufs_set_fs_state(sb, usb1, usb3,
                                UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
                ubh_mark_buffer_dirty (USPI_UBH(uspi));
-                sb->s_dirt = 0;
                sb->s_flags |= MS_RDONLY;
        } else {
        /*
@@ -1458,7 +1475,6 @@ static const struct super_operations ufs_super_ops = {
        .write_inode    = ufs_write_inode,
        .evict_inode    = ufs_evict_inode,
        .put_super      = ufs_put_super,
-        .write_super    = ufs_write_super,
        .sync_fs        = ufs_sync_fs,
        .statfs         = ufs_statfs,
        .remount_fs     = ufs_remount,
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 528750b7e701..343e6fc571e5 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -20,6 +20,10 @@ struct ufs_sb_info {
        unsigned s_mount_opt;
        struct mutex mutex;
        struct task_struct *mutex_owner;
+        struct super_block *sb;
+        int work_queued; /* non-zero if the delayed work is queued */
+        struct delayed_work sync_work; /* FS sync delayed work */
+        spinlock_t work_lock; /* protects sync_work and work_queued */
 };
 struct ufs_inode_info {
@@ -123,6 +127,7 @@ extern __printf(3, 4)
 void ufs_error(struct super_block *, const char *, const char *, ...);
 extern __printf(3, 4)
 void ufs_panic(struct super_block *, const char *, const char *, ...);
+void ufs_mark_sb_dirty(struct super_block *sb);
 /* symlink.c */
 extern const struct inode_operations ufs_fast_symlink_inode_operations;
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 8aba544f9fad..0cbd5d340b67 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -34,6 +34,7 @@
 #include <linux/kernel.h>
 #include <linux/stat.h>
 #include <linux/fs.h>
+#include <linux/workqueue.h>
 #include <asm/div64.h>
 typedef __u64 __bitwise __fs64;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9d1aeb7e2734..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,13 +1074,13 @@ restart:
         * If we couldn't get anything, give up.
         */
        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
                if (!forced++) {
                        trace_xfs_alloc_near_busy(args);
                        xfs_log_force(args->mp, XFS_LOG_SYNC);
                        goto restart;
                }
-                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
                trace_xfs_alloc_size_neither(args);
                args->agbno = NULLAGBLOCK;
                return 0;
@@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(
        current_restore_flags_nested(&pflags, PF_FSTRANS);
 }
+/*
-int                             /* error */
+ * Data allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Metadata
+ * requests, OTOH, are generally from low stack usage paths, so avoid the
+ * context switch overhead here.
+ */
+int
 xfs_alloc_vextent(
-        xfs_alloc_arg_t *args)  /* allocation argument structure */
+        struct xfs_alloc_arg    *args)
 {
        DECLARE_COMPLETION_ONSTACK(done);
+        if (!args->userdata)
+                return __xfs_alloc_vextent(args);
        args->done = &done;
        INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
        queue_work(xfs_alloc_wq, &args->work);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4beb421018a..269b35c084da 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -989,27 +989,6 @@ xfs_buf_ioerror_alert(
                (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
 }
-int
-xfs_bwrite(
-        struct xfs_buf          *bp)
-{
-        int                     error;
-        ASSERT(xfs_buf_islocked(bp));
-        bp->b_flags |= XBF_WRITE;
-        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
-        xfs_bdstrat_cb(bp);
-        error = xfs_buf_iowait(bp);
-        if (error) {
-                xfs_force_shutdown(bp->b_target->bt_mount,
-                                   SHUTDOWN_META_IO_ERROR);
-        }
-        return error;
-}
 /*
 * Called when we want to stop a buffer from getting written or read.
 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1079,14 +1058,7 @@ xfs_bioerror_relse(
        return EIO;
 }
+STATIC int
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
 xfs_bdstrat_cb(
        struct xfs_buf  *bp)
 {
@@ -1107,6 +1079,27 @@ xfs_bdstrat_cb(
        return 0;
 }
+int
+xfs_bwrite(
+        struct xfs_buf          *bp)
+{
+        int                     error;
+        ASSERT(xfs_buf_islocked(bp));
+        bp->b_flags |= XBF_WRITE;
+        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+        xfs_bdstrat_cb(bp);
+        error = xfs_buf_iowait(bp);
+        if (error) {
+                xfs_force_shutdown(bp->b_target->bt_mount,
+                                   SHUTDOWN_META_IO_ERROR);
+        }
+        return error;
+}
 /*
 * Wrapper around bdstrat so that we can stop data from going to disk in case
 * we are shutting down the filesystem.  Typically user data goes thru this
@@ -1243,7 +1236,7 @@ xfs_buf_iorequest(
         */
        atomic_set(&bp->b_io_remaining, 1);
        _xfs_buf_ioapply(bp);
-        _xfs_buf_ioend(bp, 0);
+        _xfs_buf_ioend(bp, 1);
        xfs_buf_rele(bp);
 }
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce37..79344c48008e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 extern int xfs_bwrite(struct xfs_buf *bp);
 extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
 extern void xfs_buf_ioend(xfs_buf_t *,  int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d48..d9e451115f98 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
                if (!XFS_BUF_ISSTALE(bp)) {
                        bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
-                        xfs_bdstrat_cb(bp);
+                        xfs_buf_iorequest(bp);
                } else {
                        xfs_buf_relse(bp);
                }
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3a05a41b5d76..1f1535d25a9b 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -208,6 +208,7 @@ xfs_open_by_handle(
        struct inode            *inode;
        struct dentry           *dentry;
        fmode_t                 fmode;
+        struct path             path;
        if (!capable(CAP_SYS_ADMIN))
                return -XFS_ERROR(EPERM);
@@ -252,8 +253,10 @@ xfs_open_by_handle(
                goto out_dput;
        }
-        filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
+        path.mnt = parfilp->f_path.mnt;
-                           hreq->oflags, cred);
+        path.dentry = dentry;
+        filp = dentry_open(&path, hreq->oflags, cred);
+        dput(dentry);
        if (IS_ERR(filp)) {
                put_unused_fd(fd);
                return PTR_ERR(filp);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1a25fd802798..9c4340f5c3e0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -179,7 +179,7 @@ xfs_vn_create(
        struct inode    *dir,
        struct dentry   *dentry,
        umode_t         mode,
-        struct nameidata *nd)
+        bool            flags)
 {
        return xfs_vn_mknod(dir, dentry, mode, 0);
 }
@@ -197,7 +197,7 @@ STATIC struct dentry *
 xfs_vn_lookup(
        struct inode    *dir,
        struct dentry   *dentry,
-        struct nameidata *nd)
+        unsigned int flags)
 {
        struct xfs_inode *cip;
        struct xfs_name name;
@@ -222,7 +222,7 @@ STATIC struct dentry *
 xfs_vn_ci_lookup(
        struct inode    *dir,
        struct dentry   *dentry,
-        struct nameidata *nd)
+        unsigned int flags)
 {
        struct xfs_inode *ip;
        struct xfs_name xname;
author	NeilBrown <neilb@suse.de>	2012-08-01 06:40:02 -0400
committer	NeilBrown <neilb@suse.de>	2012-08-01 06:40:02 -0400
commit	bb181e2e48f8c85db08c9cb015cbba9618dbf05c (patch)
tree	191bc24dd97bcb174535cc217af082f16da3b43d /fs
parent	d57368afe63b3b7b45ce6c2b8c5276417935be2f (diff)
parent	c039c332f23e794deb6d6f37b9f07ff3b27fb2cf (diff)