353 files changed, 3681 insertions, 10126 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 410ffd6ceb5f..dc95a252523d 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
 struct inode *v9fs_alloc_inode(struct super_block *sb);
 void v9fs_destroy_inode(struct inode *inode);
-struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t);
+struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t);
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
-                    struct inode *inode, int mode, dev_t);
+                    struct inode *inode, umode_t mode, dev_t);
 void v9fs_evict_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 879ed8851737..e0f20de6aa2b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -59,15 +59,13 @@ static const struct inode_operations v9fs_symlink_inode_operations;
 *
 */
-static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode)
 {
        int res;
        res = mode & 0777;
        if (S_ISDIR(mode))
                res |= P9_DMDIR;
        if (v9fs_proto_dotu(v9ses)) {
-                if (S_ISLNK(mode))
-                        res |= P9_DMSYMLINK;
                if (v9ses->nodev == 0) {
                        if (S_ISSOCK(mode))
                                res |= P9_DMSOCKET;
@@ -85,10 +83,7 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
                        res |= P9_DMSETGID;
                if ((mode & S_ISVTX) == S_ISVTX)
                        res |= P9_DMSETVTX;
-                if ((mode & P9_DMLINK))
-                        res |= P9_DMLINK;
        }
        return res;
 }
@@ -99,11 +94,11 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
 * @rdev: major number, minor number in case of device files.
 *
 */
-static int p9mode2unixmode(struct v9fs_session_info *v9ses,
+static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
-                           struct p9_wstat *stat, dev_t *rdev)
+                               struct p9_wstat *stat, dev_t *rdev)
 {
        int res;
-        int mode = stat->mode;
+        u32 mode = stat->mode;
        res = mode & S_IALLUGO;
        *rdev = 0;
@@ -251,7 +246,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
 static void v9fs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
 }
@@ -261,7 +255,7 @@ void v9fs_destroy_inode(struct inode *inode)
 }
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
-                    struct inode *inode, int mode, dev_t rdev)
+                    struct inode *inode, umode_t mode, dev_t rdev)
 {
        int err = 0;
@@ -335,7 +329,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
                break;
        default:
-                P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+                P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n",
                           mode, mode & S_IFMT);
                err = -EINVAL;
                goto error;
@@ -352,13 +346,13 @@ error:
 *
 */
-struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev)
+struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 {
        int err;
        struct inode *inode;
        struct v9fs_session_info *v9ses = sb->s_fs_info;
-        P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+        P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode);
        inode = new_inode(sb);
        if (!inode) {
@@ -492,7 +486,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
                                   int new)
 {
        dev_t rdev;
-        int retval, umode;
+        int retval;
+        umode_t umode;
        unsigned long i_ino;
        struct inode *inode;
        struct v9fs_session_info *v9ses = sb->s_fs_info;
@@ -703,7 +698,7 @@ error:
 */
 static int
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        int err;
@@ -786,7 +781,7 @@ error:
 *
 */
-static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int err;
        u32 perm;
@@ -1131,7 +1126,7 @@ void
 v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
        struct super_block *sb)
 {
-        mode_t mode;
+        umode_t mode;
        char ext[32];
        char tag_name[14];
        unsigned int i_nlink;
@@ -1304,9 +1299,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 */
 static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
-        int mode, const char *extension)
+        u32 perm, const char *extension)
 {
-        u32 perm;
        struct p9_fid *fid;
        struct v9fs_session_info *v9ses;
@@ -1316,7 +1310,6 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
                return -EPERM;
        }
-        perm = unixmode2p9mode(v9ses, mode);
        fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
                                                                P9_OREAD);
        if (IS_ERR(fid))
@@ -1343,7 +1336,7 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
        P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
                                        dentry->d_name.name, symname);
-        return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
+        return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
 }
 /**
@@ -1398,13 +1391,15 @@ clunk_fid:
 */
 static int
-v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
+        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
        int retval;
        char *name;
+        u32 perm;
        P9_DPRINTK(P9_DEBUG_VFS,
-                " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+                " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino,
                dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
        if (!new_valid_dev(rdev))
@@ -1427,7 +1422,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
                return -EINVAL;
        }
-        retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
+        perm = unixmode2p9mode(v9ses, mode);
+        retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
        __putname(name);
        return retval;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b5745e21946..8ef152ac6a16 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -48,7 +48,7 @@
 #include "acl.h"
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                    dev_t rdev);
 /**
@@ -253,7 +253,7 @@ int v9fs_open_to_dotl_flags(int flags)
 */
 static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                struct nameidata *nd)
 {
        int err = 0;
@@ -284,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
        name = (char *) dentry->d_name.name;
        P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
-                        "mode:0x%x\n", name, flags, omode);
+                        "mode:0x%hx\n", name, flags, omode);
        dfid = v9fs_fid_lookup(dentry->d_parent);
        if (IS_ERR(dfid)) {
@@ -395,7 +395,7 @@ err_clunk_old_fid:
 */
 static int v9fs_vfs_mkdir_dotl(struct inode *dir,
-                               struct dentry *dentry, int omode)
+                               struct dentry *dentry, umode_t omode)
 {
        int err;
        struct v9fs_session_info *v9ses;
@@ -594,7 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 void
 v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
 {
-        mode_t mode;
+        umode_t mode;
        struct v9fs_inode *v9inode = V9FS_I(inode);
        if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -799,7 +799,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 *
 */
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
                dev_t rdev)
 {
        int err;
@@ -814,7 +814,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
        struct posix_acl *dacl = NULL, *pacl = NULL;
        P9_DPRINTK(P9_DEBUG_VFS,
-                " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+                " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino,
                dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
        if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index c70251d47ed1..f68ff65a32a5 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -117,7 +117,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
        struct inode *inode = NULL;
        struct dentry *root = NULL;
        struct v9fs_session_info *v9ses = NULL;
-        int mode = S_IRWXUGO | S_ISVTX;
+        umode_t mode = S_IRWXUGO | S_ISVTX;
        struct p9_fid *fid;
        int retval = 0;
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f4c45d4aa10..30145d886bc2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -266,14 +266,6 @@ source "fs/9p/Kconfig"
 endif # NETWORK_FILESYSTEMS
-if BLOCK
-menu "Partition Types"
-source "fs/partitions/Kconfig"
-endmenu
-endif
 source "fs/nls/Kconfig"
 source "fs/dlm/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index d2c3353d5477..93804d4d66e1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,8 @@ else
 obj-y +=        no-block.o
 endif
+obj-$(CONFIG_PROC_FS) += proc_namespace.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
 obj-y                           += notify/
 obj-$(CONFIG_EPOLL)             += eventpoll.o
@@ -52,7 +54,6 @@ obj-$(CONFIG_FHANDLE)		+= fhandle.o
 obj-y                           += quota/
 obj-$(CONFIG_PROC_FS)           += proc/
-obj-y                           += partitions/
 obj-$(CONFIG_SYSFS)             += sysfs/
 obj-$(CONFIG_CONFIGFS_FS)       += configfs/
 obj-y                           += devpts/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index c8bf36a1996a..8e3b36ace305 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -126,9 +126,9 @@ static void adfs_put_super(struct super_block *sb)
        sb->s_fs_info = NULL;
 }
-static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int adfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb);
+        struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
        if (asb->s_uid != 0)
                seq_printf(seq, ",uid=%u", asb->s_uid);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c2b9c79eb64e..45a0ce45d7b4 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -136,7 +136,7 @@ extern int	affs_remove_header(struct dentry *dentry);
 extern u32      affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
 extern void     affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
 extern void     secs_to_datestamp(time_t secs, struct affs_date *ds);
-extern mode_t   prot_to_mode(u32 prot);
+extern umode_t  prot_to_mode(u32 prot);
 extern void     mode_to_prot(struct inode *inode);
 extern void     affs_error(struct super_block *sb, const char *function, const char *fmt, ...);
 extern void     affs_warning(struct super_block *sb, const char *function, const char *fmt, ...);
@@ -156,8 +156,8 @@ extern void	affs_free_bitmap(struct super_block *sb);
 extern int      affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
 extern int      affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int      affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *);
+extern int      affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
-extern int      affs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+extern int      affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 extern int      affs_rmdir(struct inode *dir, struct dentry *dentry);
 extern int      affs_link(struct dentry *olddentry, struct inode *dir,
                          struct dentry *dentry);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index de37ec842340..52a6407682e6 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -390,10 +390,10 @@ secs_to_datestamp(time_t secs, struct affs_date *ds)
        ds->ticks = cpu_to_be32(secs * 50);
 }
-mode_t
+umode_t
 prot_to_mode(u32 prot)
 {
-        int mode = 0;
+        umode_t mode = 0;
        if (!(prot & FIBF_NOWRITE))
                mode |= S_IWUSR;
@@ -421,7 +421,7 @@ void
 mode_to_prot(struct inode *inode)
 {
        u32 prot = AFFS_I(inode)->i_protect;
-        mode_t mode = inode->i_mode;
+        umode_t mode = inode->i_mode;
        if (!(mode & S_IXUSR))
                prot |= FIBF_NOEXECUTE;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 780a11dc6318..47806940aac0 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -255,13 +255,13 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
 }
 int
-affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
        struct super_block *sb = dir->i_sb;
        struct inode    *inode;
        int              error;
-        pr_debug("AFFS: create(%lu,\"%.*s\",0%o)\n",dir->i_ino,(int)dentry->d_name.len,
+        pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
                 dentry->d_name.name,mode);
        inode = affs_new_inode(dir);
@@ -285,12 +285,12 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
 }
 int
-affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode            *inode;
        int                      error;
-        pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%o)\n",dir->i_ino,
+        pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
                 (int)dentry->d_name.len,dentry->d_name.name,mode);
        inode = affs_new_inode(dir);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index b31507d0f9b9..8ba73fed7964 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,6 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
 static void affs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
 }
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1b0b19550015..e22dc4b4a503 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,9 +28,9 @@ static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
 static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
                                  loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                      struct nameidata *nd);
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
 static int afs_unlink(struct inode *dir, struct dentry *dentry);
 static int afs_link(struct dentry *from, struct inode *dir,
@@ -764,7 +764,7 @@ static void afs_d_release(struct dentry *dentry)
 /*
 * create a directory on an AFS filesystem
 */
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct afs_file_status status;
        struct afs_callback cb;
@@ -777,7 +777,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        dvnode = AFS_FS_I(dir);
-        _enter("{%x:%u},{%s},%o",
+        _enter("{%x:%u},{%s},%ho",
               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
        ret = -ENAMETOOLONG;
@@ -948,7 +948,7 @@ error:
 /*
 * create a regular file on an AFS filesystem
 */
-static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                      struct nameidata *nd)
 {
        struct afs_file_status status;
@@ -962,7 +962,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
        dvnode = AFS_FS_I(dir);
-        _enter("{%x:%u},{%s},%o,",
+        _enter("{%x:%u},{%s},%ho,",
               dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
        ret = -ENAMETOOLONG;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index aa59184151d0..8f4ce2658b7d 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -242,7 +242,7 @@ struct vfsmount *afs_d_automount(struct path *path)
 {
        struct vfsmount *newmnt;
-        _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name);
+        _enter("{%s}", path->dentry->d_name.name);
        newmnt = afs_mntpt_do_automount(path->dentry);
        if (IS_ERR(newmnt))
@@ -252,7 +252,7 @@ struct vfsmount *afs_d_automount(struct path *path)
        mnt_set_expiry(newmnt, &afs_vfsmounts);
        queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
                           afs_mntpt_expiry_timeout * HZ);
-        _leave(" = %p {%s}", newmnt, newmnt->mnt_devname);
+        _leave(" = %p", newmnt);
        return newmnt;
 }
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 356dcf0929e8..983ec59fc80d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -495,7 +495,6 @@ static void afs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
        struct afs_vnode *vnode = AFS_FS_I(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(afs_inode_cachep, vnode);
 }
diff --git a/fs/attr.c b/fs/attr.c
index 7ee7ba488313..95053ad8abcc 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(setattr_copy);
 int notify_change(struct dentry * dentry, struct iattr * attr)
 {
        struct inode *inode = dentry->d_inode;
-        mode_t mode = inode->i_mode;
+        umode_t mode = inode->i_mode;
        int error;
        struct timespec now;
        unsigned int ia_valid = attr->ia_valid;
@@ -177,7 +177,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
        }
        if ((ia_valid & ATTR_MODE)) {
-                mode_t amode = attr->ia_mode;
+                umode_t amode = attr->ia_mode;
                /* Flag setting protected by i_mutex */
                if (is_sxid(amode))
                        inode->i_flags &= ~S_NOSEC;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 326dc08d3e3f..5869d4e974a9 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -155,7 +155,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
        return 0;
 }
-struct inode *autofs4_get_inode(struct super_block *, mode_t);
+struct inode *autofs4_get_inode(struct super_block *, umode_t);
 void autofs4_free_ino(struct autofs_info *);
 /* Expiration */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 509fe1eb66ae..76741d8d7786 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -194,7 +194,7 @@ static int find_autofs_mount(const char *pathname,
                return err;
        err = -ENOENT;
        while (path.dentry == path.mnt->mnt_root) {
-                if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) {
+                if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) {
                        if (test(&path, data)) {
                                path_get(&path);
                                if (!err) /* already found some */
@@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname,
 static int test_by_dev(struct path *path, void *p)
 {
-        return path->mnt->mnt_sb->s_dev == *(dev_t *)p;
+        return path->dentry->d_sb->s_dev == *(dev_t *)p;
 }
 static int test_by_type(struct path *path, void *p)
@@ -538,11 +538,11 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
                        err = find_autofs_mount(name, &path, test_by_type, &type);
                if (err)
                        goto out;
-                devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
+                devid = new_encode_dev(path.dentry->d_sb->s_dev);
                err = 0;
                if (path.mnt->mnt_root == path.dentry) {
                        err = 1;
-                        magic = path.mnt->mnt_sb->s_magic;
+                        magic = path.dentry->d_sb->s_magic;
                }
        } else {
                dev_t dev = sbi->sb->s_dev;
@@ -556,7 +556,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
                err = have_submounts(path.dentry);
                if (follow_down_one(&path))
-                        magic = path.mnt->mnt_sb->s_magic;
+                        magic = path.dentry->d_sb->s_magic;
        }
        param->ismountpoint.out.devid = devid;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8179f1ab8175..2ba44c79d548 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -70,10 +70,10 @@ out_kill_sb:
        kill_litter_super(sb);
 }
-static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int autofs4_show_options(struct seq_file *m, struct dentry *root)
 {
-        struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb);
+        struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
-        struct inode *root_inode = mnt->mnt_sb->s_root->d_inode;
+        struct inode *root_inode = root->d_sb->s_root->d_inode;
        if (!sbi)
                return 0;
@@ -326,7 +326,7 @@ fail_unlock:
        return -EINVAL;
 }
-struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
+struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
 {
        struct inode *inode = new_inode(sb);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index f55ae23b137e..75e5f1c8e028 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -26,7 +26,7 @@
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
 static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
-static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
+static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
 static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
 #ifdef CONFIG_COMPAT
 static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
@@ -699,7 +699,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
        return 0;
 }
-static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
        struct autofs_info *ino = autofs4_dentry_ino(dentry);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9205cf25f1c6..22e9a78872ff 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops =
 };
 static int bad_inode_create (struct inode *dir, struct dentry *dentry,
-                int mode, struct nameidata *nd)
+                umode_t mode, struct nameidata *nd)
 {
        return -EIO;
 }
@@ -202,7 +202,7 @@ static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
 }
 static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
-                        int mode)
+                        umode_t mode)
 {
        return -EIO;
 }
@@ -213,7 +213,7 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
 }
 static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
-                        int mode, dev_t rdev)
+                        umode_t mode, dev_t rdev)
 {
        return -EIO;
 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8342ca67abcd..6e6d536767fe 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -286,7 +286,6 @@ befs_alloc_inode(struct super_block *sb)
 static void befs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 9cc074019479..d12c7966db27 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -84,7 +84,7 @@ const struct file_operations bfs_dir_operations = {
 extern void dump_imap(const char *, struct super_block *);
-static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                                                struct nameidata *nd)
 {
        int err;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 697af5bf70b3..b0391bc402b1 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -251,7 +251,6 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
 static void bfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
 }
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1e9edbdeda7e..a9198dfd5f85 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -560,7 +560,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
                        break;
                case 2: set_bit(Enabled, &e->flags);
                        break;
-                case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+                case 3: root = dget(file->f_path.dentry->d_sb->s_root);
                        mutex_lock(&root->d_inode->i_mutex);
                        kill_node(e);
@@ -587,7 +587,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
        Node *e;
        struct inode *inode;
        struct dentry *root, *dentry;
-        struct super_block *sb = file->f_path.mnt->mnt_sb;
+        struct super_block *sb = file->f_path.dentry->d_sb;
        int err = 0;
        e = create_entry(buffer, count);
@@ -666,7 +666,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
        switch (res) {
                case 1: enabled = 0; break;
                case 2: enabled = 1; break;
-                case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+                case 3: root = dget(file->f_path.dentry->d_sb->s_root);
                        mutex_lock(&root->d_inode->i_mutex);
                        while (!list_empty(&entries))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b07f1da1de4e..69a5b6fbee2b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
+#include <linux/swap.h>
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 #include <linux/mpage.h>
@@ -25,6 +26,7 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/kmemleak.h>
+#include <linux/cleancache.h>
 #include <asm/uaccess.h>
 #include "internal.h"
@@ -82,13 +84,35 @@ static sector_t max_block(struct block_device *bdev)
 }
 /* Kill _all_ buffers and pagecache , dirty or not.. */
-static void kill_bdev(struct block_device *bdev)
+void kill_bdev(struct block_device *bdev)
 {
-        if (bdev->bd_inode->i_mapping->nrpages == 0)
+        struct address_space *mapping = bdev->bd_inode->i_mapping;
+        if (mapping->nrpages == 0)
                return;
        invalidate_bh_lrus();
-        truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+        truncate_inode_pages(mapping, 0);
 }       
+EXPORT_SYMBOL(kill_bdev);
+/* Invalidate clean unused buffers and pagecache. */
+void invalidate_bdev(struct block_device *bdev)
+{
+        struct address_space *mapping = bdev->bd_inode->i_mapping;
+        if (mapping->nrpages == 0)
+                return;
+        invalidate_bh_lrus();
+        lru_add_drain_all();    /* make sure all lru add caches are flushed */
+        invalidate_mapping_pages(mapping, 0, -1);
+        /* 99% of the time, we don't need to flush the cleancache on the bdev.
+         * But, for the strange corners, lets be cautious
+         */
+        cleancache_flush_inode(mapping);
+}
+EXPORT_SYMBOL(invalidate_bdev);
 int set_blocksize(struct block_device *bdev, int size)
 {
@@ -425,7 +449,6 @@ static void bdev_i_callback(struct rcu_head *head)
        struct inode *inode = container_of(head, struct inode, i_rcu);
        struct bdev_inode *bdi = BDEV_I(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(bdev_cachep, bdi);
 }
@@ -493,7 +516,7 @@ static struct file_system_type bd_type = {
        .kill_sb        = kill_anon_super,
 };
-struct super_block *blockdev_superblock __read_mostly;
+static struct super_block *blockdev_superblock __read_mostly;
 void __init bdev_cache_init(void)
 {
@@ -639,6 +662,11 @@ static struct block_device *bd_acquire(struct inode *inode)
        return bdev;
 }
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+        return sb == blockdev_superblock;
+}
 /* Call when you free inode */
 void bd_forget(struct inode *inode)
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef1..0cc20b35c1c4 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -64,6 +64,8 @@ struct btrfs_worker_thread {
        int idle;
 };
+static int __btrfs_start_workers(struct btrfs_workers *workers);
 /*
 * btrfs_start_workers uses kthread_run, which can block waiting for memory
 * for a very long time.  It will actually throttle on page writeback,
@@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work)
 {
        struct worker_start *start;
        start = container_of(work, struct worker_start, work);
-        btrfs_start_workers(start->queue, 1);
+        __btrfs_start_workers(start->queue);
        kfree(start);
 }
-static int start_new_worker(struct btrfs_workers *queue)
-{
-        struct worker_start *start;
-        int ret;
-        start = kzalloc(sizeof(*start), GFP_NOFS);
-        if (!start)
-                return -ENOMEM;
-        start->work.func = start_new_worker_func;
-        start->queue = queue;
-        ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
-        if (ret)
-                kfree(start);
-        return ret;
-}
 /*
 * helper function to move a thread onto the idle list after it
 * has finished some requests.
@@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
 static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
 {
        struct btrfs_workers *workers = worker->workers;
+        struct worker_start *start;
        unsigned long flags;
        rmb();
        if (!workers->atomic_start_pending)
                return;
+        start = kzalloc(sizeof(*start), GFP_NOFS);
+        if (!start)
+                return;
+        start->work.func = start_new_worker_func;
+        start->queue = workers;
        spin_lock_irqsave(&workers->lock, flags);
        if (!workers->atomic_start_pending)
                goto out;
@@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
        workers->num_workers_starting += 1;
        spin_unlock_irqrestore(&workers->lock, flags);
-        start_new_worker(workers);
+        btrfs_queue_worker(workers->atomic_worker_start, &start->work);
        return;
 out:
+        kfree(start);
        spin_unlock_irqrestore(&workers->lock, flags);
 }
@@ -331,7 +325,7 @@ again:
                        run_ordered_completions(worker->workers, work);
                        check_pending_worker_creates(worker);
+                        cond_resched();
                }
                spin_lock_irq(&worker->lock);
@@ -340,7 +334,7 @@ again:
                if (freezing(current)) {
                        worker->working = 0;
                        spin_unlock_irq(&worker->lock);
-                        refrigerator();
+                        try_to_freeze();
                } else {
                        spin_unlock_irq(&worker->lock);
                        if (!kthread_should_stop()) {
@@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
 * starts new worker threads.  This does not enforce the max worker
 * count in case you need to temporarily go past it.
 */
-static int __btrfs_start_workers(struct btrfs_workers *workers,
+static int __btrfs_start_workers(struct btrfs_workers *workers)
-                                 int num_workers)
 {
        struct btrfs_worker_thread *worker;
        int ret = 0;
-        int i;
-        for (i = 0; i < num_workers; i++) {
+        worker = kzalloc(sizeof(*worker), GFP_NOFS);
-                worker = kzalloc(sizeof(*worker), GFP_NOFS);
+        if (!worker) {
-                if (!worker) {
+                ret = -ENOMEM;
-                        ret = -ENOMEM;
+                goto fail;
-                        goto fail;
+        }
-                }
-                INIT_LIST_HEAD(&worker->pending);
+        INIT_LIST_HEAD(&worker->pending);
-                INIT_LIST_HEAD(&worker->prio_pending);
+        INIT_LIST_HEAD(&worker->prio_pending);
-                INIT_LIST_HEAD(&worker->worker_list);
+        INIT_LIST_HEAD(&worker->worker_list);
-                spin_lock_init(&worker->lock);
+        spin_lock_init(&worker->lock);
-                atomic_set(&worker->num_pending, 0);
+        atomic_set(&worker->num_pending, 0);
-                atomic_set(&worker->refs, 1);
+        atomic_set(&worker->refs, 1);
-                worker->workers = workers;
+        worker->workers = workers;
-                worker->task = kthread_run(worker_loop, worker,
+        worker->task = kthread_run(worker_loop, worker,
-                                           "btrfs-%s-%d", workers->name,
+                                   "btrfs-%s-%d", workers->name,
-                                           workers->num_workers + i);
+                                   workers->num_workers + 1);
-                if (IS_ERR(worker->task)) {
+        if (IS_ERR(worker->task)) {
-                        ret = PTR_ERR(worker->task);
+                ret = PTR_ERR(worker->task);
-                        kfree(worker);
+                kfree(worker);
-                        goto fail;
+                goto fail;
-                }
-                spin_lock_irq(&workers->lock);
-                list_add_tail(&worker->worker_list, &workers->idle_list);
-                worker->idle = 1;
-                workers->num_workers++;
-                workers->num_workers_starting--;
-                WARN_ON(workers->num_workers_starting < 0);
-                spin_unlock_irq(&workers->lock);
        }
+        spin_lock_irq(&workers->lock);
+        list_add_tail(&worker->worker_list, &workers->idle_list);
+        worker->idle = 1;
+        workers->num_workers++;
+        workers->num_workers_starting--;
+        WARN_ON(workers->num_workers_starting < 0);
+        spin_unlock_irq(&workers->lock);
        return 0;
 fail:
-        btrfs_stop_workers(workers);
+        spin_lock_irq(&workers->lock);
+        workers->num_workers_starting--;
+        spin_unlock_irq(&workers->lock);
        return ret;
 }
-int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+int btrfs_start_workers(struct btrfs_workers *workers)
 {
        spin_lock_irq(&workers->lock);
-        workers->num_workers_starting += num_workers;
+        workers->num_workers_starting++;
        spin_unlock_irq(&workers->lock);
-        return __btrfs_start_workers(workers, num_workers);
+        return __btrfs_start_workers(workers);
 }
 /*
@@ -568,9 +561,10 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
        struct btrfs_worker_thread *worker;
        unsigned long flags;
        struct list_head *fallback;
+        int ret;
-again:
        spin_lock_irqsave(&workers->lock, flags);
+again:
        worker = next_worker(workers);
        if (!worker) {
@@ -584,7 +578,10 @@ again:
                        workers->num_workers_starting++;
                        spin_unlock_irqrestore(&workers->lock, flags);
                        /* we're below the limit, start another worker */
-                        __btrfs_start_workers(workers, 1);
+                        ret = __btrfs_start_workers(workers);
+                        spin_lock_irqsave(&workers->lock, flags);
+                        if (ret)
+                                goto fallback;
                        goto again;
                }
        }
@@ -665,7 +662,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work)
 /*
 * places a struct btrfs_work into the pending queue of one of the kthreads
 */
-int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
+void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
 {
        struct btrfs_worker_thread *worker;
        unsigned long flags;
@@ -673,7 +670,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
        /* don't requeue something already on a list */
        if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
-                goto out;
+                return;
        worker = find_worker(workers);
        if (workers->ordered) {
@@ -712,7 +709,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
        if (wake)
                wake_up_process(worker->task);
        spin_unlock_irqrestore(&worker->lock, flags);
-out:
-        return 0;
 }
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 5077746cf85e..f34cc31fa3c9 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -109,8 +109,8 @@ struct btrfs_workers {
        char *name;
 };
-int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
+void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
-int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
+int btrfs_start_workers(struct btrfs_workers *workers);
 int btrfs_stop_workers(struct btrfs_workers *workers);
 void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
                        struct btrfs_workers *async_starter);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 50634abef9b4..67385033323d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2692,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-void btrfs_dirty_inode(struct inode *inode, int flags);
+int btrfs_dirty_inode(struct inode *inode);
+int btrfs_update_time(struct file *file);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5b163572e0ca..9c1eccc2c503 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -640,8 +640,8 @@ static int btrfs_delayed_inode_reserve_metadata(
         * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
         * we're accounted for.
         */
-        if (!trans->bytes_reserved &&
+        if (!src_rsv || (!trans->bytes_reserved &&
-            src_rsv != &root->fs_info->delalloc_block_rsv) {
+            src_rsv != &root->fs_info->delalloc_block_rsv)) {
                ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
                /*
                 * Since we're under a transaction reserve_metadata_bytes could
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 632f8f3cc9db..f99a099a7747 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1579,9 +1579,7 @@ static int cleaner_kthread(void *arg)
                        btrfs_run_defrag_inodes(root->fs_info);
                }
-                if (freezing(current)) {
+                if (!try_to_freeze()) {
-                        refrigerator();
-                } else {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (!kthread_should_stop())
                                schedule();
@@ -1635,9 +1633,7 @@ sleep:
                wake_up_process(root->fs_info->cleaner_kthread);
                mutex_unlock(&root->fs_info->transaction_kthread_mutex);
-                if (freezing(current)) {
+                if (!try_to_freeze()) {
-                        refrigerator();
-                } else {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (!kthread_should_stop() &&
                            !btrfs_transaction_blocked(root->fs_info))
@@ -2194,19 +2190,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->endio_meta_write_workers.idle_thresh = 2;
        fs_info->readahead_workers.idle_thresh = 2;
-        btrfs_start_workers(&fs_info->workers, 1);
+        /*
-        btrfs_start_workers(&fs_info->generic_worker, 1);
+         * btrfs_start_workers can really only fail because of ENOMEM so just
-        btrfs_start_workers(&fs_info->submit_workers, 1);
+         * return -ENOMEM if any of these fail.
-        btrfs_start_workers(&fs_info->delalloc_workers, 1);
+         */
-        btrfs_start_workers(&fs_info->fixup_workers, 1);
+        ret = btrfs_start_workers(&fs_info->workers);
-        btrfs_start_workers(&fs_info->endio_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->generic_worker);
-        btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->submit_workers);
-        btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->delalloc_workers);
-        btrfs_start_workers(&fs_info->endio_write_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->fixup_workers);
-        btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
+        ret |= btrfs_start_workers(&fs_info->endio_workers);
-        btrfs_start_workers(&fs_info->delayed_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
-        btrfs_start_workers(&fs_info->caching_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
-        btrfs_start_workers(&fs_info->readahead_workers, 1);
+        ret |= btrfs_start_workers(&fs_info->endio_write_workers);
+        ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
+        ret |= btrfs_start_workers(&fs_info->delayed_workers);
+        ret |= btrfs_start_workers(&fs_info->caching_workers);
+        ret |= btrfs_start_workers(&fs_info->readahead_workers);
+        if (ret) {
+                ret = -ENOMEM;
+                goto fail_sb_buffer;
+        }
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2ad813674d77..f5fbe576d2ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2822,7 +2822,7 @@ out_free:
        btrfs_release_path(path);
 out:
        spin_lock(&block_group->lock);
-        if (!ret)
+        if (!ret && dcs == BTRFS_DC_SETUP)
                block_group->cache_generation = trans->transid;
        block_group->disk_cache_state = dcs;
        spin_unlock(&block_group->lock);
@@ -4204,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
        u64 to_reserve = 0;
+        u64 csum_bytes;
        unsigned nr_extents = 0;
+        int extra_reserve = 0;
        int flush = 1;
        int ret;
+        /* Need to be holding the i_mutex here if we aren't free space cache */
        if (btrfs_is_free_space_inode(root, inode))
                flush = 0;
+        else
+                WARN_ON(!mutex_is_locked(&inode->i_mutex));
        if (flush && btrfs_transaction_in_commit(root->fs_info))
                schedule_timeout(1);
@@ -4220,11 +4225,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        BTRFS_I(inode)->outstanding_extents++;
        if (BTRFS_I(inode)->outstanding_extents >
-            BTRFS_I(inode)->reserved_extents) {
+            BTRFS_I(inode)->reserved_extents)
                nr_extents = BTRFS_I(inode)->outstanding_extents -
                        BTRFS_I(inode)->reserved_extents;
-                BTRFS_I(inode)->reserved_extents += nr_extents;
-        }
        /*
         * Add an item to reserve for updating the inode when we complete the
@@ -4232,11 +4235,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
         */
        if (!BTRFS_I(inode)->delalloc_meta_reserved) {
                nr_extents++;
-                BTRFS_I(inode)->delalloc_meta_reserved = 1;
+                extra_reserve = 1;
        }
        to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
        to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+        csum_bytes = BTRFS_I(inode)->csum_bytes;
        spin_unlock(&BTRFS_I(inode)->lock);
        ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -4246,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
                spin_lock(&BTRFS_I(inode)->lock);
                dropped = drop_outstanding_extent(inode);
-                to_free = calc_csum_metadata_size(inode, num_bytes, 0);
-                spin_unlock(&BTRFS_I(inode)->lock);
-                to_free += btrfs_calc_trans_metadata_size(root, dropped);
                /*
-                 * Somebody could have come in and twiddled with the
+                 * If the inodes csum_bytes is the same as the original
-                 * reservation, so if we have to free more than we would have
+                 * csum_bytes then we know we haven't raced with any free()ers
-                 * reserved from this reservation go ahead and release those
+                 * so we can just reduce our inodes csum bytes and carry on.
-                 * bytes.
+                 * Otherwise we have to do the normal free thing to account for
+                 * the case that the free side didn't free up its reserve
+                 * because of this outstanding reservation.
                 */
-                to_free -= to_reserve;
+                if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+                        calc_csum_metadata_size(inode, num_bytes, 0);
+                else
+                        to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+                spin_unlock(&BTRFS_I(inode)->lock);
+                if (dropped)
+                        to_free += btrfs_calc_trans_metadata_size(root, dropped);
                if (to_free)
                        btrfs_block_rsv_release(root, block_rsv, to_free);
                return ret;
        }
+        spin_lock(&BTRFS_I(inode)->lock);
+        if (extra_reserve) {
+                BTRFS_I(inode)->delalloc_meta_reserved = 1;
+                nr_extents--;
+        }
+        BTRFS_I(inode)->reserved_extents += nr_extents;
+        spin_unlock(&BTRFS_I(inode)->lock);
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
        return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dafdfa059bf6..97fbe939c050 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
        nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
                     PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
                     (sizeof(struct page *)));
+        nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
+        nrptrs = max(nrptrs, 8);
        pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
        if (!pages)
                return -ENOMEM;
@@ -1387,7 +1389,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                goto out;
        }
-        file_update_time(file);
+        err = btrfs_update_time(file);
+        if (err) {
+                mutex_unlock(&inode->i_mutex);
+                goto out;
+        }
        BTRFS_I(inode)->sequence++;
        start_pos = round_down(pos, root->sectorsize);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ec23d43d0c35..9a897bf79538 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -423,7 +423,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
        }
        if (index == 0)
-                offset = sizeof(u32) * io_ctl->num_pages;;
+                offset = sizeof(u32) * io_ctl->num_pages;
        crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
                              PAGE_CACHE_SIZE - offset);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c984f7d4c2a..81b235a61f8c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -38,6 +38,7 @@
 #include <linux/falloc.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
+#include <linux/mount.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -1943,7 +1944,7 @@ enum btrfs_orphan_cleanup_state {
 };
 /*
- * This is called in transaction commmit time. If there are no orphan
+ * This is called in transaction commit time. If there are no orphan
 * files in the subvolume, it removes orphan item and frees block_rsv
 * structure.
 */
@@ -2031,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
        /* insert an orphan item to track this unlinked/truncated file */
        if (insert >= 1) {
                ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
-                BUG_ON(ret);
+                BUG_ON(ret && ret != -EEXIST);
        }
        /* insert an orphan item to track subvolume contains orphan files */
@@ -2158,6 +2159,38 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                if (ret && ret != -ESTALE)
                        goto out;
+                if (ret == -ESTALE && root == root->fs_info->tree_root) {
+                        struct btrfs_root *dead_root;
+                        struct btrfs_fs_info *fs_info = root->fs_info;
+                        int is_dead_root = 0;
+                        /*
+                         * this is an orphan in the tree root. Currently these
+                         * could come from 2 sources:
+                         *  a) a snapshot deletion in progress
+                         *  b) a free space cache inode
+                         * We need to distinguish those two, as the snapshot
+                         * orphan must not get deleted.
+                         * find_dead_roots already ran before us, so if this
+                         * is a snapshot deletion, we should find the root
+                         * in the dead_roots list
+                         */
+                        spin_lock(&fs_info->trans_lock);
+                        list_for_each_entry(dead_root, &fs_info->dead_roots,
+                                            root_list) {
+                                if (dead_root->root_key.objectid ==
+                                    found_key.objectid) {
+                                        is_dead_root = 1;
+                                        break;
+                                }
+                        }
+                        spin_unlock(&fs_info->trans_lock);
+                        if (is_dead_root) {
+                                /* prevent this orphan from being found again */
+                                key.offset = found_key.objectid - 1;
+                                continue;
+                        }
+                }
                /*
                 * Inode is already gone but the orphan item is still there,
                 * kill the orphan item.
@@ -2191,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                continue;
                        }
                        nr_truncate++;
+                        /*
+                         * Need to hold the imutex for reservation purposes, not
+                         * a huge deal here but I have a WARN_ON in
+                         * btrfs_delalloc_reserve_space to catch offenders.
+                         */
+                        mutex_lock(&inode->i_mutex);
                        ret = btrfs_truncate(inode);
+                        mutex_unlock(&inode->i_mutex);
                } else {
                        nr_unlink++;
                }
@@ -3327,7 +3367,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                        u64 hint_byte = 0;
                        hole_size = last_byte - cur_offset;
-                        trans = btrfs_start_transaction(root, 2);
+                        trans = btrfs_start_transaction(root, 3);
                        if (IS_ERR(trans)) {
                                err = PTR_ERR(trans);
                                break;
@@ -3337,6 +3377,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                                 cur_offset + hole_size,
                                                 &hint_byte, 1);
                        if (err) {
+                                btrfs_update_inode(trans, root, inode);
                                btrfs_end_transaction(trans, root);
                                break;
                        }
@@ -3346,6 +3387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                        0, hole_size, 0, hole_size,
                                        0, 0, 0);
                        if (err) {
+                                btrfs_update_inode(trans, root, inode);
                                btrfs_end_transaction(trans, root);
                                break;
                        }
@@ -3353,6 +3395,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                        btrfs_drop_extent_cache(inode, hole_start,
                                        last_byte - 1, 0);
+                        btrfs_update_inode(trans, root, inode);
                        btrfs_end_transaction(trans, root);
                }
                free_extent_map(em);
@@ -3370,6 +3413,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 static int btrfs_setsize(struct inode *inode, loff_t newsize)
 {
+        struct btrfs_root *root = BTRFS_I(inode)->root;
+        struct btrfs_trans_handle *trans;
        loff_t oldsize = i_size_read(inode);
        int ret;
@@ -3377,16 +3422,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
                return 0;
        if (newsize > oldsize) {
-                i_size_write(inode, newsize);
-                btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
                truncate_pagecache(inode, oldsize, newsize);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
-                if (ret) {
+                if (ret)
-                        btrfs_setsize(inode, oldsize);
                        return ret;
-                }
-                mark_inode_dirty(inode);
+                trans = btrfs_start_transaction(root, 1);
+                if (IS_ERR(trans))
+                        return PTR_ERR(trans);
+                i_size_write(inode, newsize);
+                btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+                ret = btrfs_update_inode(trans, root, inode);
+                btrfs_end_transaction_throttle(trans, root);
        } else {
                /*
@@ -3426,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        if (attr->ia_valid) {
                setattr_copy(inode, attr);
-                mark_inode_dirty(inode);
+                err = btrfs_dirty_inode(inode);
-                if (attr->ia_valid & ATTR_MODE)
+                if (!err && attr->ia_valid & ATTR_MODE)
                        err = btrfs_acl_chmod(inode);
        }
@@ -4204,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 * FIXME, needs more benchmarking...there are no reasons other than performance
 * to keep or drop this code.
 */
-void btrfs_dirty_inode(struct inode *inode, int flags)
+int btrfs_dirty_inode(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        int ret;
        if (BTRFS_I(inode)->dummy_inode)
-                return;
+                return 0;
        trans = btrfs_join_transaction(root);
-        BUG_ON(IS_ERR(trans));
+        if (IS_ERR(trans))
+                return PTR_ERR(trans);
        ret = btrfs_update_inode(trans, root, inode);
        if (ret && ret == -ENOSPC) {
                /* whoops, lets try again with the full transaction */
                btrfs_end_transaction(trans, root);
                trans = btrfs_start_transaction(root, 1);
-                if (IS_ERR(trans)) {
+                if (IS_ERR(trans))
-                        printk_ratelimited(KERN_ERR "btrfs: fail to "
+                        return PTR_ERR(trans);
-                                       "dirty  inode %llu error %ld\n",
-                                       (unsigned long long)btrfs_ino(inode),
-                                       PTR_ERR(trans));
-                        return;
-                }
                ret = btrfs_update_inode(trans, root, inode);
-                if (ret) {
-                        printk_ratelimited(KERN_ERR "btrfs: fail to "
-                                       "dirty  inode %llu error %d\n",
-                                       (unsigned long long)btrfs_ino(inode),
-                                       ret);
-                }
        }
        btrfs_end_transaction(trans, root);
        if (BTRFS_I(inode)->delayed_node)
                btrfs_balance_delayed_items(root);
+        return ret;
+}
+/*
+ * This is a copy of file_update_time.  We need this so we can return error on
+ * ENOSPC for updating the inode in the case of file write and mmap writes.
+ */
+int btrfs_update_time(struct file *file)
+{
+        struct inode *inode = file->f_path.dentry->d_inode;
+        struct timespec now;
+        int ret;
+        enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+        /* First try to exhaust all avenues to not sync */
+        if (IS_NOCMTIME(inode))
+                return 0;
+        now = current_fs_time(inode->i_sb);
+        if (!timespec_equal(&inode->i_mtime, &now))
+                sync_it = S_MTIME;
+        if (!timespec_equal(&inode->i_ctime, &now))
+                sync_it |= S_CTIME;
+        if (IS_I_VERSION(inode))
+                sync_it |= S_VERSION;
+        if (!sync_it)
+                return 0;
+        /* Finally allowed to write? Takes lock. */
+        if (mnt_want_write_file(file))
+                return 0;
+        /* Only change inode inside the lock region */
+        if (sync_it & S_VERSION)
+                inode_inc_iversion(inode);
+        if (sync_it & S_CTIME)
+                inode->i_ctime = now;
+        if (sync_it & S_MTIME)
+                inode->i_mtime = now;
+        ret = btrfs_dirty_inode(inode);
+        if (!ret)
+                mark_inode_dirty_sync(inode);
+        mnt_drop_write(file->f_path.mnt);
+        return ret;
 }
 /*
@@ -4326,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
                                     const char *name, int name_len,
-                                     u64 ref_objectid, u64 objectid, int mode,
+                                     u64 ref_objectid, u64 objectid,
-                                     u64 *index)
+                                     umode_t mode, u64 *index)
 {
        struct inode *inode;
        struct btrfs_inode_item *inode_item;
@@ -4504,17 +4590,13 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
        int err = btrfs_add_link(trans, dir, inode,
                                 dentry->d_name.name, dentry->d_name.len,
                                 backref, index);
-        if (!err) {
-                d_instantiate(dentry, inode);
-                return 0;
-        }
        if (err > 0)
                err = -EEXIST;
        return err;
 }
 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
-                        int mode, dev_t rdev)
+                        umode_t mode, dev_t rdev)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -4555,13 +4637,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
+        /*
+        * If the active LSM wants to access the inode during
+        * d_instantiate it needs these. Smack checks to see
+        * if the filesystem supports xattrs by looking at the
+        * ops vector.
+        */
+        inode->i_op = &btrfs_special_inode_operations;
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
        else {
-                inode->i_op = &btrfs_special_inode_operations;
                init_special_inode(inode, inode->i_mode, rdev);
                btrfs_update_inode(trans, root, inode);
+                d_instantiate(dentry, inode);
        }
 out_unlock:
        nr = trans->blocks_used;
@@ -4575,7 +4665,7 @@ out_unlock:
 }
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
-                        int mode, struct nameidata *nd)
+                        umode_t mode, struct nameidata *nd)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -4613,15 +4703,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
+        /*
+        * If the active LSM wants to access the inode during
+        * d_instantiate it needs these. Smack checks to see
+        * if the filesystem supports xattrs by looking at the
+        * ops vector.
+        */
+        inode->i_fop = &btrfs_file_operations;
+        inode->i_op = &btrfs_file_inode_operations;
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
        else {
                inode->i_mapping->a_ops = &btrfs_aops;
                inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-                inode->i_fop = &btrfs_file_operations;
-                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+                d_instantiate(dentry, inode);
        }
 out_unlock:
        nr = trans->blocks_used;
@@ -4679,6 +4777,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                struct dentry *parent = dentry->d_parent;
                err = btrfs_update_inode(trans, root, inode);
                BUG_ON(err);
+                d_instantiate(dentry, inode);
                btrfs_log_new_name(trans, inode, NULL, parent);
        }
@@ -4693,7 +4792,7 @@ fail:
        return err;
 }
-static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode = NULL;
        struct btrfs_trans_handle *trans;
@@ -6303,7 +6402,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 page_start;
        u64 page_end;
+        /* Need this to keep space reservations serialized */
+        mutex_lock(&inode->i_mutex);
        ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+        mutex_unlock(&inode->i_mutex);
+        if (!ret)
+                ret = btrfs_update_time(vma->vm_file);
        if (ret) {
                if (ret == -ENOMEM)
                        ret = VM_FAULT_OOM;
@@ -6515,8 +6619,9 @@ static int btrfs_truncate(struct inode *inode)
                        /* Just need the 1 for updating the inode */
                        trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
-                                err = PTR_ERR(trans);
+                                ret = err = PTR_ERR(trans);
-                                goto out;
+                                trans = NULL;
+                                break;
                        }
                }
@@ -6656,7 +6761,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
 static void btrfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
@@ -7076,14 +7180,21 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
+        /*
+        * If the active LSM wants to access the inode during
+        * d_instantiate it needs these. Smack checks to see
+        * if the filesystem supports xattrs by looking at the
+        * ops vector.
+        */
+        inode->i_fop = &btrfs_file_operations;
+        inode->i_op = &btrfs_file_inode_operations;
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
        else {
                inode->i_mapping->a_ops = &btrfs_aops;
                inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-                inode->i_fop = &btrfs_file_operations;
-                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
        if (drop_inode)
@@ -7132,6 +7243,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                drop_inode = 1;
 out_unlock:
+        if (!err)
+                d_instantiate(dentry, inode);
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
        if (drop_inode) {
@@ -7353,6 +7466,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
        .getattr        = btrfs_getattr,
+        .setattr        = btrfs_setattr,
        .permission     = btrfs_permission,
        .setxattr       = btrfs_setxattr,
        .getxattr       = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 72d461656f60..5441ff1480fd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -201,7 +201,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                }
        }
-        ret = mnt_want_write(file->f_path.mnt);
+        ret = mnt_want_write_file(file);
        if (ret)
                goto out_unlock;
@@ -252,14 +252,14 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
+        btrfs_update_iflags(inode);
+        inode->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode(trans, root, inode);
        BUG_ON(ret);
-        btrfs_update_iflags(inode);
-        inode->i_ctime = CURRENT_TIME;
        btrfs_end_transaction(trans, root);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
        ret = 0;
 out_unlock:
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
                return 0;
        file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
+        mutex_lock(&inode->i_mutex);
        ret = btrfs_delalloc_reserve_space(inode,
                                           num_pages << PAGE_CACHE_SHIFT);
+        mutex_unlock(&inode->i_mutex);
        if (ret)
                return ret;
 again:
@@ -1853,7 +1855,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out;
        }
-        err = mnt_want_write(file->f_path.mnt);
+        err = mnt_want_write_file(file);
        if (err)
                goto out;
@@ -1969,7 +1971,7 @@ out_dput:
        dput(dentry);
 out_unlock_dir:
        mutex_unlock(&dir->i_mutex);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
 out:
        kfree(vol_args);
        return err;
@@ -1985,7 +1987,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
        if (btrfs_root_readonly(root))
                return -EROFS;
-        ret = mnt_want_write(file->f_path.mnt);
+        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
@@ -2038,7 +2040,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                ret = -EINVAL;
        }
 out:
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
        return ret;
 }
@@ -2193,7 +2195,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        if (btrfs_root_readonly(root))
                return -EROFS;
-        ret = mnt_want_write(file->f_path.mnt);
+        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
@@ -2508,7 +2510,7 @@ out_unlock:
 out_fput:
        fput(src_file);
 out_drop_write:
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
        return ret;
 }
@@ -2547,7 +2549,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
        if (btrfs_root_readonly(root))
                goto out;
-        ret = mnt_want_write(file->f_path.mnt);
+        ret = mnt_want_write_file(file);
        if (ret)
                goto out;
@@ -2563,7 +2565,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
 out_drop:
        atomic_dec(&root->fs_info->open_ioctl_trans);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
 out:
        return ret;
 }
@@ -2798,7 +2800,7 @@ long btrfs_ioctl_trans_end(struct file *file)
        atomic_dec(&root->fs_info->open_ioctl_trans);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
        return 0;
 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5e151a..cfb55434a469 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2947,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
        index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
        last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
        while (index <= last_index) {
+                mutex_lock(&inode->i_mutex);
                ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
+                mutex_unlock(&inode->i_mutex);
                if (ret)
                        goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c27bcb67f330..ddf2c90d3fc0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1535,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
+        int ret = 0;
        mutex_lock(&fs_info->scrub_lock);
        if (fs_info->scrub_workers_refcnt == 0) {
                btrfs_init_workers(&fs_info->scrub_workers, "scrub",
                           fs_info->thread_pool_size, &fs_info->generic_worker);
                fs_info->scrub_workers.idle_thresh = 4;
-                btrfs_start_workers(&fs_info->scrub_workers, 1);
+                ret = btrfs_start_workers(&fs_info->scrub_workers);
+                if (ret)
+                        goto out;
        }
        ++fs_info->scrub_workers_refcnt;
+out:
        mutex_unlock(&fs_info->scrub_lock);
-        return 0;
+        return ret;
 }
 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e28ad4baf483..ae488aa1966a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,7 +40,7 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/cleancache.h>
-#include <linux/mnt_namespace.h>
+#include <linux/ratelimit.h>
 #include "compat.h"
 #include "delayed-inode.h"
 #include "ctree.h"
@@ -661,9 +661,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
        return ret;
 }
-static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 {
-        struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
+        struct btrfs_root *root = btrfs_sb(dentry->d_sb);
        struct btrfs_fs_info *info = root->fs_info;
        char *compress_type;
@@ -1053,7 +1053,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
        u64 avail_space;
        u64 used_space;
        u64 min_stripe_size;
-        int min_stripes = 1;
+        int min_stripes = 1, num_stripes = 1;
        int i = 0, nr_devices;
        int ret;
@@ -1067,12 +1067,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
        /* calc min stripe number for data space alloction */
        type = btrfs_get_alloc_profile(root, 1);
-        if (type & BTRFS_BLOCK_GROUP_RAID0)
+        if (type & BTRFS_BLOCK_GROUP_RAID0) {
                min_stripes = 2;
-        else if (type & BTRFS_BLOCK_GROUP_RAID1)
+                num_stripes = nr_devices;
+        } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
                min_stripes = 2;
-        else if (type & BTRFS_BLOCK_GROUP_RAID10)
+                num_stripes = 2;
+        } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
                min_stripes = 4;
+                num_stripes = 4;
+        }
        if (type & BTRFS_BLOCK_GROUP_DUP)
                min_stripe_size = 2 * BTRFS_STRIPE_LEN;
@@ -1141,13 +1145,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
        i = nr_devices - 1;
        avail_space = 0;
        while (nr_devices >= min_stripes) {
+                if (num_stripes > nr_devices)
+                        num_stripes = nr_devices;
                if (devices_info[i].max_avail >= min_stripe_size) {
                        int j;
                        u64 alloc_size;
-                        avail_space += devices_info[i].max_avail * min_stripes;
+                        avail_space += devices_info[i].max_avail * num_stripes;
                        alloc_size = devices_info[i].max_avail;
-                        for (j = i + 1 - min_stripes; j <= i; j++)
+                        for (j = i + 1 - num_stripes; j <= i; j++)
                                devices_info[j].max_avail -= alloc_size;
                }
                i--;
@@ -1264,6 +1271,16 @@ static int btrfs_unfreeze(struct super_block *sb)
        return 0;
 }
+static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
+{
+        int ret;
+        ret = btrfs_dirty_inode(inode);
+        if (ret)
+                printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
+                                   "error %d\n", btrfs_ino(inode), ret);
+}
 static const struct super_operations btrfs_super_ops = {
        .drop_inode     = btrfs_drop_inode,
        .evict_inode    = btrfs_evict_inode,
@@ -1271,7 +1288,7 @@ static const struct super_operations btrfs_super_ops = {
        .sync_fs        = btrfs_sync_fs,
        .show_options   = btrfs_show_options,
        .write_inode    = btrfs_write_inode,
-        .dirty_inode    = btrfs_dirty_inode,
+        .dirty_inode    = btrfs_fs_dirty_inode,
        .alloc_inode    = btrfs_alloc_inode,
        .destroy_inode  = btrfs_destroy_inode,
        .statfs         = btrfs_statfs,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0a8c8f8304b1..f4b839fd3c9d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -295,6 +295,12 @@ loop_lock:
                        btrfs_requeue_work(&device->work);
                        goto done;
                }
+                /* unplug every 64 requests just for good measure */
+                if (batch_run % 64 == 0) {
+                        blk_finish_plug(&plug);
+                        blk_start_plug(&plug);
+                        sync_pending = 0;
+                }
        }
        cond_resched();
@@ -3258,7 +3264,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
                 */
                if (atomic_read(&bbio->error) > bbio->max_errors) {
                        err = -EIO;
-                } else if (err) {
+                } else {
                        /*
                         * this bio is actually up to date, we didn't
                         * go over the max number of errors
diff --git a/fs/buffer.c b/fs/buffer.c
index 19d8eb7fdc81..1a30db77af32 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
-#include <linux/cleancache.h>
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -231,55 +230,6 @@ out:
        return ret;
 }
-/* If invalidate_buffers() will trash dirty buffers, it means some kind
-   of fs corruption is going on. Trashing dirty data always imply losing
-   information that was supposed to be just stored on the physical layer
-   by the user.
-   Thus invalidate_buffers in general usage is not allwowed to trash
-   dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to
-   be preserved.  These buffers are simply skipped.
-  
-   We also skip buffers which are still in use.  For example this can
-   happen if a userspace program is reading the block device.
-   NOTE: In the case where the user removed a removable-media-disk even if
-   there's still dirty data not synced on disk (due a bug in the device driver
-   or due an error of the user), by not destroying the dirty buffers we could
-   generate corruption also on the next media inserted, thus a parameter is
-   necessary to handle this case in the most safe way possible (trying
-   to not corrupt also the new disk inserted with the data belonging to
-   the old now corrupted disk). Also for the ramdisk the natural thing
-   to do in order to release the ramdisk memory is to destroy dirty buffers.
-   These are two special cases. Normal usage imply the device driver
-   to issue a sync on the device (without waiting I/O completion) and
-   then an invalidate_buffers call that doesn't trash dirty buffers.
-   For handling cache coherency with the blkdev pagecache the 'update' case
-   is been introduced. It is needed to re-read from disk any pinned
-   buffer. NOTE: re-reading from disk is destructive so we can do it only
-   when we assume nobody is changing the buffercache under our I/O and when
-   we think the disk contains more recent information than the buffercache.
-   The update == 1 pass marks the buffers we need to update, the update == 2
-   pass does the actual I/O. */
-void invalidate_bdev(struct block_device *bdev)
-{
-        struct address_space *mapping = bdev->bd_inode->i_mapping;
-        if (mapping->nrpages == 0)
-                return;
-        invalidate_bh_lrus();
-        lru_add_drain_all();    /* make sure all lru add caches are flushed */
-        invalidate_mapping_pages(mapping, 0, -1);
-        /* 99% of the time, we don't need to flush the cleancache on the bdev.
-         * But, for the strange corners, lets be cautious
-         */
-        cleancache_flush_inode(mapping);
-}
-EXPORT_SYMBOL(invalidate_bdev);
 /*
 * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
 */
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1064805e653b..67bef6d01484 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/mount.h>
-#include <linux/buffer_head.h>
 #include "internal.h"
 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4144caf2f9d3..173b1d22e59b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
        snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
        /* dirty the head */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (ci->i_head_snapc == NULL)
                ci->i_head_snapc = ceph_get_snap_context(snapc);
        ++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
             ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
             ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
             snapc, snapc->seq, snapc->num_snaps);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        /* now adjust page */
        spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
        struct ceph_snap_context *snapc = NULL;
        struct ceph_cap_snap *capsnap = NULL;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
                dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
                     capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
                dout(" head snapc %p has %d dirty pages\n",
                     snapc, ci->i_wrbuffer_ref_head);
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return snapc;
 }
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0f327c6c9679..b60fc8bfb3e9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 /*
 * Find ceph_cap for given mds, if any.
 *
- * Called with i_lock held.
+ * Called with i_ceph_lock held.
 */
 static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
 {
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
 {
        struct ceph_cap *cap;
-        spin_lock(&ci->vfs_inode.i_lock);
+        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ci, mds);
-        spin_unlock(&ci->vfs_inode.i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return cap;
 }
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
 int ceph_get_cap_mds(struct inode *inode)
 {
+        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        mds = __ceph_get_cap_mds(ceph_inode(inode));
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return mds;
 }
 /*
- * Called under i_lock.
+ * Called under i_ceph_lock.
 */
 static void __insert_cap_node(struct ceph_inode_info *ci,
                              struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 *
 * If I_FLUSH is set, leave the inode at the front of the list.
 *
- * Caller holds i_lock
+ * Caller holds i_ceph_lock
 *    -> we take mdsc->cap_delay_lock
 */
 static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
 /*
 * Cancel delayed work on cap.
 *
- * Caller must hold i_lock.
+ * Caller must hold i_ceph_lock.
 */
 static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
                               struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
                wanted |= ceph_caps_for_mode(fmode);
 retry:
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ci, mds);
        if (!cap) {
                if (new_cap) {
                        cap = new_cap;
                        new_cap = NULL;
                } else {
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        new_cap = get_cap(mdsc, caps_reservation);
                        if (new_cap == NULL)
                                return -ENOMEM;
@@ -625,7 +626,7 @@ retry:
        if (fmode >= 0)
                __ceph_get_fmode(ci, fmode);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        wake_up_all(&ci->i_cap_wq);
        return 0;
 }
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
        struct rb_node *p;
        int ret = 0;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
                cap = rb_entry(p, struct ceph_cap, ci_node);
                if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
                        break;
                }
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        dout("ceph_caps_revoking %p %s = %d\n", inode,
             ceph_cap_string(mask), ret);
        return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
 }
 /*
- * called under i_lock
+ * called under i_ceph_lock
 */
 static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 {
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 /*
 * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
 *
- * caller should hold i_lock.
+ * caller should hold i_ceph_lock.
 * caller will not hold session s_mutex if called from destroy_inode.
 */
 void __ceph_remove_cap(struct ceph_cap *cap)
@@ -927,7 +928,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
                        u64 size, u64 max_size,
                        struct timespec *mtime, struct timespec *atime,
                        u64 time_warp_seq,
-                        uid_t uid, gid_t gid, mode_t mode,
+                        uid_t uid, gid_t gid, umode_t mode,
                        u64 xattr_version,
                        struct ceph_buffer *xattrs_buf,
                        u64 follows)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
 /*
 * Queue cap releases when an inode is dropped from our cache.  Since
- * inode is about to be destroyed, there is no need for i_lock.
+ * inode is about to be destroyed, there is no need for i_ceph_lock.
 */
 void ceph_queue_caps_release(struct inode *inode)
 {
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
 /*
 * Send a cap msg on the given inode.  Update our caps state, then
- * drop i_lock and send the message.
+ * drop i_ceph_lock and send the message.
 *
 * Make note of max_size reported/requested from mds, revoked caps
 * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
 * Return non-zero if delayed release, or we experienced an error
 * such that the caller should requeue + retry later.
 *
- * called with i_lock, then drops it.
+ * called with i_ceph_lock, then drops it.
 * caller should hold snap_rwsem (read), s_mutex.
 */
 static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
                      int op, int used, int want, int retain, int flushing,
                      unsigned *pflush_tid)
-        __releases(cap->ci->vfs_inode->i_lock)
+        __releases(cap->ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = cap->ci;
        struct inode *inode = &ci->vfs_inode;
@@ -1077,7 +1078,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        u64 size, max_size;
        struct timespec mtime, atime;
        int wake = 0;
-        mode_t mode;
+        umode_t mode;
        uid_t uid;
        gid_t gid;
        struct ceph_mds_session *session;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
                xattr_version = ci->i_xattrs.version;
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
                op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 * Unless @again is true, skip cap_snaps that were already sent to
 * the MDS (i.e., during this session).
 *
- * Called under i_lock.  Takes s_mutex as needed.
+ * Called under i_ceph_lock.  Takes s_mutex as needed.
 */
 void __ceph_flush_snaps(struct ceph_inode_info *ci,
                        struct ceph_mds_session **psession,
                        int again)
-                __releases(ci->vfs_inode->i_lock)
+                __releases(ci->i_ceph_lock)
-                __acquires(ci->vfs_inode->i_lock)
+                __acquires(ci->i_ceph_lock)
 {
        struct inode *inode = &ci->vfs_inode;
        int mds;
@@ -1261,7 +1262,7 @@ retry:
                        session = NULL;
                }
                if (!session) {
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        mutex_lock(&mdsc->mutex);
                        session = __ceph_lookup_mds_session(mdsc, mds);
                        mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ retry:
                         * deletion or migration.  retry, and we'll
                         * get a better @mds value next time.
                         */
-                        spin_lock(&inode->i_lock);
+                        spin_lock(&ci->i_ceph_lock);
                        goto retry;
                }
@@ -1285,7 +1286,7 @@ retry:
                        list_del_init(&capsnap->flushing_item);
                list_add_tail(&capsnap->flushing_item,
                              &session->s_cap_snaps_flushing);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
                     inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ retry:
                next_follows = capsnap->follows + 1;
                ceph_put_cap_snap(capsnap);
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                goto retry;
        }
@@ -1322,11 +1323,9 @@ out:
 static void ceph_flush_snaps(struct ceph_inode_info *ci)
 {
-        struct inode *inode = &ci->vfs_inode;
+        spin_lock(&ci->i_ceph_lock);
-        spin_lock(&inode->i_lock);
        __ceph_flush_snaps(ci, NULL, 0);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 /*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 * Add dirty inode to the flushing list.  Assigned a seq number so we
 * can wait for caps to flush without starving.
 *
- * Called under i_lock.
+ * Called under i_ceph_lock.
 */
 static int __mark_caps_flushing(struct inode *inode,
                                 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
        struct ceph_inode_info *ci = ceph_inode(inode);
        u32 invalidating_gen = ci->i_rdcache_gen;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        invalidate_mapping_pages(&inode->i_data, 0, -1);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (inode->i_data.nrpages == 0 &&
            invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
        if (mdsc->stopping)
                is_delayed = 1;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (ci->i_ceph_flags & CEPH_I_FLUSH)
                flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
                __ceph_flush_snaps(ci, &session, 0);
        goto retry_locked;
 retry:
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
 retry_locked:
        file_wanted = __ceph_caps_file_wanted(ci);
        used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ ack:
                        if (mutex_trylock(&session->s_mutex) == 0) {
                                dout("inverting session/ino locks on %p\n",
                                     session);
-                                spin_unlock(&inode->i_lock);
+                                spin_unlock(&ci->i_ceph_lock);
                                if (took_snap_rwsem) {
                                        up_read(&mdsc->snap_rwsem);
                                        took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ ack:
                        if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
                                dout("inverting snap/in locks on %p\n",
                                     inode);
-                                spin_unlock(&inode->i_lock);
+                                spin_unlock(&ci->i_ceph_lock);
                                down_read(&mdsc->snap_rwsem);
                                took_snap_rwsem = 1;
                                goto retry;
@@ -1664,10 +1663,10 @@ ack:
                mds = cap->mds;  /* remember mds, so we don't repeat */
                sent++;
-                /* __send_cap drops i_lock */
+                /* __send_cap drops i_ceph_lock */
                delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
                                      retain, flushing, NULL);
-                goto retry; /* retake i_lock and restart our cap scan. */
+                goto retry; /* retake i_ceph_lock and restart our cap scan. */
        }
        /*
@@ -1681,7 +1680,7 @@ ack:
        else if (!is_delayed || force_requeue)
                __cap_delay_requeue(mdsc, ci);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (queue_invalidate)
                ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
        int flushing = 0;
 retry:
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
                dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
                goto out;
@@ -1716,7 +1715,7 @@ retry:
                int delayed;
                if (!session) {
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        session = cap->session;
                        mutex_lock(&session->s_mutex);
                        goto retry;
@@ -1727,18 +1726,18 @@ retry:
                flushing = __mark_caps_flushing(inode, session);
-                /* __send_cap drops i_lock */
+                /* __send_cap drops i_ceph_lock */
                delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
                                     cap->issued | cap->implemented, flushing,
                                     flush_tid);
                if (!delayed)
                        goto out_unlocked;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                __cap_delay_requeue(mdsc, ci);
        }
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 out_unlocked:
        if (session && unlock_session)
                mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
        struct ceph_inode_info *ci = ceph_inode(inode);
        int i, ret = 1;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        for (i = 0; i < CEPH_CAP_BITS; i++)
                if ((ci->i_flushing_caps & (1 << i)) &&
                    ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
                        ret = 0;
                        break;
                }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
                struct ceph_mds_client *mdsc =
                        ceph_sb_to_client(inode->i_sb)->mdsc;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                if (__ceph_caps_dirty(ci))
                        __cap_delay_requeue_front(mdsc, ci);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
        }
        return err;
 }
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
                struct inode *inode = &ci->vfs_inode;
                struct ceph_cap *cap;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                cap = ci->i_auth_cap;
                if (cap && cap->session == session) {
                        dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
                        pr_err("%p auth cap %p not mds%d ???\n", inode,
                               cap, session->s_mds);
                }
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
        }
 }
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                struct ceph_cap *cap;
                int delayed = 0;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                cap = ci->i_auth_cap;
                if (cap && cap->session == session) {
                        dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
                                             cap->issued | cap->implemented,
                                             ci->i_flushing_caps, NULL);
                        if (delayed) {
-                                spin_lock(&inode->i_lock);
+                                spin_lock(&ci->i_ceph_lock);
                                __cap_delay_requeue(mdsc, ci);
-                                spin_unlock(&inode->i_lock);
+                                spin_unlock(&ci->i_ceph_lock);
                        }
                } else {
                        pr_err("%p auth cap %p not mds%d ???\n", inode,
                               cap, session->s_mds);
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                }
        }
 }
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
        struct ceph_cap *cap;
        int delayed = 0;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        cap = ci->i_auth_cap;
        dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
             ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
                                     cap->issued | cap->implemented,
                                     ci->i_flushing_caps, NULL);
                if (delayed) {
-                        spin_lock(&inode->i_lock);
+                        spin_lock(&ci->i_ceph_lock);
                        __cap_delay_requeue(mdsc, ci);
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                }
        } else {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
        }
 }
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
 * Take references to capabilities we hold, so that we don't release
 * them to the MDS prematurely.
 *
- * Protected by i_lock.
+ * Protected by i_ceph_lock.
 */
 static void __take_cap_refs(struct ceph_inode_info *ci, int got)
 {
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
        dout("get_cap_refs %p need %s want %s\n", inode,
             ceph_cap_string(need), ceph_cap_string(want));
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        /* make sure file is actually open */
        file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
                     ceph_cap_string(have), ceph_cap_string(need));
        }
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        dout("get_cap_refs %p ret %d got %s\n", inode,
             ret, ceph_cap_string(*got));
        return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
        int check = 0;
        /* do we need to explicitly request a larger max_size? */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if ((endoff >= ci->i_max_size ||
             endoff > (inode->i_size << 1)) &&
            endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
                ci->i_wanted_max_size = endoff;
                check = 1;
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (check)
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 }
@@ -2140,9 +2139,9 @@ retry:
 */
 void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
 {
-        spin_lock(&ci->vfs_inode.i_lock);
+        spin_lock(&ci->i_ceph_lock);
        __take_cap_refs(ci, caps);
-        spin_unlock(&ci->vfs_inode.i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 /*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
        int last = 0, put = 0, flushsnaps = 0, wake = 0;
        struct ceph_cap_snap *capsnap;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (had & CEPH_CAP_PIN)
                --ci->i_pin_ref;
        if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                                }
                        }
                }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
             last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
        int found = 0;
        struct ceph_cap_snap *capsnap = NULL;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        ci->i_wrbuffer_ref -= nr;
        last = !ci->i_wrbuffer_ref;
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                }
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (last) {
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
 * actually be a revocation if it specifies a smaller cap set.)
 *
- * caller holds s_mutex and i_lock, we drop both.
+ * caller holds s_mutex and i_ceph_lock, we drop both.
 *
 * return value:
 *  0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
                             struct ceph_mds_session *session,
                             struct ceph_cap *cap,
                             struct ceph_buffer *xattr_buf)
-                __releases(inode->i_lock)
+                __releases(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        }
        BUG_ON(cap->issued & ~cap->implemented);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (writeback)
                /*
                 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
                                 struct ceph_mds_caps *m,
                                 struct ceph_mds_session *session,
                                 struct ceph_cap *cap)
-        __releases(inode->i_lock)
+        __releases(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
        wake_up_all(&ci->i_cap_wq);
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (drop)
                iput(inode);
 }
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
        dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
             inode, ci, session->s_mds, follows);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
                if (capsnap->follows == follows) {
                        if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
                             capsnap, capsnap->follows);
                }
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (drop)
                iput(inode);
 }
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
 static void handle_cap_trunc(struct inode *inode,
                             struct ceph_mds_caps *trunc,
                             struct ceph_mds_session *session)
-        __releases(inode->i_lock)
+        __releases(ci->i_ceph_lock)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
             inode, mds, seq, truncate_size, truncate_seq);
        queue_trunc = ceph_fill_file_size(inode, issued,
                                          truncate_seq, truncate_size, size);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (queue_trunc)
                ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
        dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
             inode, ci, mds, mseq);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        /* make sure we haven't seen a higher mseq */
        for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
        }
        /* else, we already released it */
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 /*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
        up_read(&mdsc->snap_rwsem);
        /* make sure we re-request max_size, if necessary */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        ci->i_requested_max_size = 0;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 /*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        struct ceph_mds_client *mdsc = session->s_mdsc;
        struct super_block *sb = mdsc->fsc->sb;
        struct inode *inode;
+        struct ceph_inode_info *ci;
        struct ceph_cap *cap;
        struct ceph_mds_caps *h;
        int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        /* lookup ino */
        inode = ceph_find_inode(sb, vino);
+        ci = ceph_inode(inode);
        dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
             vino.snap, inode);
        if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        }
        /* the rest require a cap */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        cap = __get_cap_for_mds(ceph_inode(inode), mds);
        if (!cap) {
                dout(" no cap on %p ino %llx.%llx from mds%d\n",
                     inode, ceph_ino(inode), ceph_snap(inode), mds);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                goto flush_cap_releases;
        }
-        /* note that each of these drops i_lock for us */
+        /* note that each of these drops i_ceph_lock for us */
        switch (op) {
        case CEPH_CAP_OP_REVOKE:
        case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                break;
        default:
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
                       ceph_cap_op_name(op));
        }
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
        struct inode *inode = &ci->vfs_inode;
        int last = 0;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
             ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
        BUG_ON(ci->i_nr_by_mode[fmode] == 0);
        if (--ci->i_nr_by_mode[fmode] == 0)
                last++;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (last && ci->i_vino.snap == CEPH_NOSNAP)
                ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
        int used, dirty;
        int ret = 0;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        used = __ceph_caps_used(ci);
        dirty = __ceph_caps_dirty(ci);
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
                             inode, cap, ceph_cap_string(cap->issued));
                }
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
        /*
         * force an record for the directory caps if we have a dentry lease.
-         * this is racy (can't take i_lock and d_lock together), but it
+         * this is racy (can't take i_ceph_lock and d_lock together), but it
         * doesn't have to be perfect; the mds will revoke anything we don't
         * release.
         */
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index bca3948e9dbf..74fd74719dc2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
        }
        /* can we use the dcache? */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if ((filp->f_pos == 2 || fi->dentry) &&
            !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
            ceph_snap(inode) != CEPH_SNAPDIR &&
            ceph_dir_test_complete(inode) &&
            __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                err = __dcache_readdir(filp, dirent, filldir);
                if (err != -EAGAIN)
                        return err;
        } else {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
        }
        if (fi->dentry) {
                err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ more:
         * were released during the whole readdir, and we should have
         * the complete dir contents in our cache.
         */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (ci->i_release_count == fi->dir_release_count) {
                ceph_dir_set_complete(inode);
                ci->i_max_offset = filp->f_pos;
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        dout("readdir %p filp %p done.\n", inode, filp);
        return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                struct ceph_inode_info *ci = ceph_inode(dir);
                struct ceph_dentry_info *di = ceph_dentry(dentry);
-                spin_lock(&dir->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
                if (strncmp(dentry->d_name.name,
                            fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                    !is_root_ceph_dentry(dir, dentry) &&
                    ceph_dir_test_complete(dir) &&
                    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
-                        spin_unlock(&dir->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        dout(" dir %p complete, -ENOENT\n", dir);
                        d_add(dentry, NULL);
                        di->lease_shared_gen = ci->i_shared_gen;
                        return NULL;
                }
-                spin_unlock(&dir->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
        }
        op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -666,7 +666,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 }
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
-                      int mode, dev_t rdev)
+                      umode_t mode, dev_t rdev)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -676,7 +676,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
        if (ceph_snap(dir) != CEPH_NOSNAP)
                return -EROFS;
-        dout("mknod in dir %p dentry %p mode 0%o rdev %d\n",
+        dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
             dir, dentry, mode, rdev);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
        if (IS_ERR(req)) {
@@ -699,7 +699,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
        return err;
 }
-static int ceph_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                       struct nameidata *nd)
 {
        dout("create in dir %p dentry %p name '%.*s'\n",
@@ -753,7 +753,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
        return err;
 }
-static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -767,7 +767,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
                     dentry->d_name.len, dentry->d_name.name, dentry);
        } else if (ceph_snap(dir) == CEPH_NOSNAP) {
-                dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode);
+                dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
                op = CEPH_MDS_OP_MKDIR;
        } else {
                goto out;
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
        struct ceph_inode_info *ci = ceph_inode(inode);
        int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (inode->i_nlink == 1) {
                drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
                ci->i_ceph_flags |= CEPH_I_NODELAY;
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return drop;
 }
@@ -870,7 +870,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
        } else if (ceph_snap(dir) == CEPH_NOSNAP) {
                dout("unlink/rmdir dir %p dn %p inode %p\n",
                     dir, dentry, inode);
-                op = ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) ?
+                op = S_ISDIR(dentry->d_inode->i_mode) ?
                        CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
        } else
                goto out;
@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
        struct ceph_dentry_info *di = ceph_dentry(dentry);
        int valid = 0;
-        spin_lock(&dir->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (ci->i_shared_gen == di->lease_shared_gen)
                valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
-        spin_unlock(&dir->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
             dir, (unsigned)ci->i_shared_gen, dentry,
             (unsigned)di->lease_shared_gen, valid);
@@ -1094,42 +1094,19 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
 /*
 * Set/clear/test dir complete flag on the dir's dentry.
 */
-static struct dentry * __d_find_any_alias(struct inode *inode)
-{
-        struct dentry *alias;
-        if (list_empty(&inode->i_dentry))
-                return NULL;
-        alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
-        return alias;
-}
 void ceph_dir_set_complete(struct inode *inode)
 {
-        struct dentry *dentry = __d_find_any_alias(inode);
+        /* not yet implemented */
-        
-        if (dentry && ceph_dentry(dentry)) {
-                dout(" marking %p (%p) complete\n", inode, dentry);
-                set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-        }
 }
 void ceph_dir_clear_complete(struct inode *inode)
 {
-        struct dentry *dentry = __d_find_any_alias(inode);
+        /* not yet implemented */
-        if (dentry && ceph_dentry(dentry)) {
-                dout(" marking %p (%p) NOT complete\n", inode, dentry);
-                clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
-        }
 }
 bool ceph_dir_test_complete(struct inode *inode)
 {
-        struct dentry *dentry = __d_find_any_alias(inode);
+        /* not yet implemented */
-        if (dentry && ceph_dentry(dentry))
-                return test_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
        return false;
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce549d31eeb7..ed72428d9c75 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
        /* trivially open snapdir */
        if (ceph_snap(inode) == CEPH_SNAPDIR) {
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                __ceph_get_fmode(ci, fmode);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                return ceph_init_file(inode, file, fmode);
        }
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
         * write) or any MDS (for read).  Update wanted set
         * asynchronously.
         */
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (__ceph_is_any_real_caps(ci) &&
            (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
                int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
                     inode, fmode, ceph_cap_string(wanted),
                     ceph_cap_string(issued));
                __ceph_get_fmode(ci, fmode);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                /* adjust wanted? */
                if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
        } else if (ceph_snap(inode) != CEPH_NOSNAP &&
                   (ci->i_snap_caps & wanted) == wanted) {
                __ceph_get_fmode(ci, fmode);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                return ceph_init_file(inode, file, fmode);
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
        req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ retry_snap:
                 */
                int dirty;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                ceph_put_cap_refs(ci, got);
                ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ retry_snap:
        if (ret >= 0) {
                int dirty;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                if (dirty)
                        __mark_inode_dirty(inode, dirty);
        }
@@ -797,7 +797,8 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
        mutex_lock(&inode->i_mutex);
        __ceph_do_pending_vmtruncate(inode);
-        if (origin != SEEK_CUR || origin != SEEK_SET) {
+        if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
                ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
                if (ret < 0) {
                        offset = ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 116f36502f17..25283e7a37f8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        dout("alloc_inode %p\n", &ci->vfs_inode);
+        spin_lock_init(&ci->i_ceph_lock);
        ci->i_version = 0;
        ci->i_time_warp_seq = 0;
        ci->i_ceph_flags = 0;
@@ -382,7 +384,6 @@ static void ceph_i_callback(struct rcu_head *head)
        struct inode *inode = container_of(head, struct inode, i_rcu);
        struct ceph_inode_info *ci = ceph_inode(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ceph_inode_cachep, ci);
 }
@@ -583,7 +584,7 @@ static int fill_inode(struct inode *inode,
                               iinfo->xattr_len);
        }
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        /*
         * provided version will be odd if inode value is projected,
@@ -680,7 +681,7 @@ static int fill_inode(struct inode *inode,
                        char *sym;
                        BUG_ON(symlen != inode->i_size);
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        err = -ENOMEM;
                        sym = kmalloc(symlen+1, GFP_NOFS);
@@ -689,7 +690,7 @@ static int fill_inode(struct inode *inode,
                        memcpy(sym, iinfo->symlink, symlen);
                        sym[symlen] = 0;
-                        spin_lock(&inode->i_lock);
+                        spin_lock(&ci->i_ceph_lock);
                        if (!ci->i_symlink)
                                ci->i_symlink = sym;
                        else
@@ -715,7 +716,7 @@ static int fill_inode(struct inode *inode,
        }
 no_change:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        /* queue truncate if we saw i_size decrease */
        if (queue_trunc)
@@ -750,13 +751,13 @@ no_change:
                                     info->cap.flags,
                                     caps_reservation);
                } else {
-                        spin_lock(&inode->i_lock);
+                        spin_lock(&ci->i_ceph_lock);
                        dout(" %p got snap_caps %s\n", inode,
                             ceph_cap_string(le32_to_cpu(info->cap.caps)));
                        ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
                        if (cap_fmode >= 0)
                                __ceph_get_fmode(ci, cap_fmode);
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                }
        } else if (cap_fmode >= 0) {
                pr_warning("mds issued no caps on %llx.%llx\n",
@@ -849,19 +850,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
 {
        struct dentry *dir = dn->d_parent;
        struct inode *inode = dir->d_inode;
+        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_dentry_info *di;
        BUG_ON(!inode);
        di = ceph_dentry(dn);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (!ceph_dir_test_complete(inode)) {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                return;
        }
        di->offset = ceph_inode(inode)->i_max_offset++;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        spin_lock(&dir->d_lock);
        spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1308,7 +1310,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
        struct ceph_inode_info *ci = ceph_inode(inode);
        int ret = 0;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
        inode->i_size = size;
        inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1318,7 +1320,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
            (ci->i_reported_size << 1) < ci->i_max_size)
                ret = 1;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return ret;
 }
@@ -1376,20 +1378,20 @@ static void ceph_invalidate_work(struct work_struct *work)
        u32 orig_gen;
        int check = 0;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        dout("invalidate_pages %p gen %d revoking %d\n", inode,
             ci->i_rdcache_gen, ci->i_rdcache_revoking);
        if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
                /* nevermind! */
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                goto out;
        }
        orig_gen = ci->i_rdcache_gen;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        truncate_inode_pages(&inode->i_data, 0);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (orig_gen == ci->i_rdcache_gen &&
            orig_gen == ci->i_rdcache_revoking) {
                dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1401,7 +1403,7 @@ static void ceph_invalidate_work(struct work_struct *work)
                     inode, orig_gen, ci->i_rdcache_gen,
                     ci->i_rdcache_revoking);
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (check)
                ceph_check_caps(ci, 0, NULL);
@@ -1460,10 +1462,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
        int wrbuffer_refs, wake = 0;
 retry:
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (ci->i_truncate_pending == 0) {
                dout("__do_pending_vmtruncate %p none pending\n", inode);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                return;
        }
@@ -1474,7 +1476,7 @@ retry:
        if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
                dout("__do_pending_vmtruncate %p flushing snaps first\n",
                     inode);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                filemap_write_and_wait_range(&inode->i_data, 0,
                                             inode->i_sb->s_maxbytes);
                goto retry;
@@ -1484,15 +1486,15 @@ retry:
        wrbuffer_refs = ci->i_wrbuffer_ref;
        dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
             ci->i_truncate_pending, to);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        truncate_inode_pages(inode->i_mapping, to);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        ci->i_truncate_pending--;
        if (ci->i_truncate_pending == 0)
                wake = 1;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (wrbuffer_refs == 0)
                ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1547,7 +1549,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (IS_ERR(req))
                return PTR_ERR(req);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
        dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -1695,7 +1697,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        }
        release &= issued;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (inode_dirty_flags)
                __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1717,7 +1719,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        __ceph_do_pending_vmtruncate(inode);
        return err;
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        ceph_mdsc_put_request(req);
        return err;
 }
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 5a14c29cbba6..790914a598dd 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -241,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
        struct ceph_inode_info *ci = ceph_inode(inode);
        if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                ci->i_nr_by_mode[fi->fmode]--;
                fi->fmode |= CEPH_FILE_MODE_LAZY;
                ci->i_nr_by_mode[fi->fmode]++;
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                dout("ioctl_layzio: file %p marked lazy\n", file);
                ceph_check_caps(ci, 0, NULL);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 264ab701154f..6203d805eb45 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -732,21 +732,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                }
        }
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        cap = NULL;
        if (mode == USE_AUTH_MDS)
                cap = ci->i_auth_cap;
        if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
                cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
        if (!cap) {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                goto random;
        }
        mds = cap->session->s_mds;
        dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
             inode, ceph_vinop(inode), mds,
             cap == ci->i_auth_cap ? "auth " : "", cap);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return mds;
 random:
@@ -951,7 +951,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        dout("removing cap %p, ci is %p, inode is %p\n",
             cap, ci, &ci->vfs_inode);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        __ceph_remove_cap(cap);
        if (!__ceph_is_any_real_caps(ci)) {
                struct ceph_mds_client *mdsc =
@@ -984,7 +984,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                }
                spin_unlock(&mdsc->cap_dirty_lock);
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        while (drop--)
                iput(inode);
        return 0;
@@ -1015,10 +1015,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
        wake_up_all(&ci->i_cap_wq);
        if (arg) {
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                ci->i_wanted_max_size = 0;
                ci->i_requested_max_size = 0;
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
        }
        return 0;
 }
@@ -1151,7 +1151,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        if (session->s_trim_caps <= 0)
                return -1;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        mine = cap->issued | cap->implemented;
        used = __ceph_caps_used(ci);
        oissued = __ceph_caps_issued_other(ci, cap);
@@ -1170,7 +1170,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
                __ceph_remove_cap(cap);
        } else {
                /* try to drop referring dentries */
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                d_prune_aliases(inode);
                dout("trim_caps_cb %p cap %p  pruned, count now %d\n",
                     inode, cap, atomic_read(&inode->i_count));
@@ -1178,7 +1178,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
        }
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return 0;
 }
@@ -1296,7 +1296,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
                                           i_flushing_item);
                        struct inode *inode = &ci->vfs_inode;
-                        spin_lock(&inode->i_lock);
+                        spin_lock(&ci->i_ceph_lock);
                        if (ci->i_cap_flush_seq <= want_flush_seq) {
                                dout("check_cap_flush still flushing %p "
                                     "seq %lld <= %lld to mds%d\n", inode,
@@ -1304,7 +1304,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
                                     session->s_mds);
                                ret = 0;
                        }
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                }
                mutex_unlock(&session->s_mutex);
                ceph_put_mds_session(session);
@@ -1495,6 +1495,7 @@ retry:
                             pos, temp);
                } else if (stop_on_nosnap && inode &&
                           ceph_snap(inode) == CEPH_NOSNAP) {
+                        spin_unlock(&temp->d_lock);
                        break;
                } else {
                        pos -= temp->d_name.len;
@@ -2011,10 +2012,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
        struct ceph_inode_info *ci = ceph_inode(inode);
        dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        ceph_dir_clear_complete(inode);
        ci->i_release_count++;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (req->r_dentry)
                ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2422,7 +2423,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
        if (err)
                goto out_free;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        cap->seq = 0;        /* reset cap seq */
        cap->issue_seq = 0;  /* and issue_seq */
@@ -2445,7 +2446,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
                rec.v1.pathbase = cpu_to_le64(pathbase);
                reclen = sizeof(rec.v1);
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (recon_state->flock) {
                int num_fcntl_locks, num_flock_locks;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4bb239921dbd..a50ca0e39475 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -20,7 +20,7 @@
 *
 *         mdsc->snap_rwsem
 *
- *         inode->i_lock
+ *         ci->i_ceph_lock
 *                 mdsc->snap_flush_lock
 *                 mdsc->cap_delay_lock
 *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e26437191333..a559c80f127a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -446,7 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
                return;
        }
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        used = __ceph_caps_used(ci);
        dirty = __ceph_caps_dirty(ci);
@@ -528,7 +528,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
                kfree(capsnap);
        }
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 /*
@@ -537,7 +537,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 *
 * If capsnap can now be flushed, add to snap_flush list, and return 1.
 *
- * Caller must hold i_lock.
+ * Caller must hold i_ceph_lock.
 */
 int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
                            struct ceph_cap_snap *capsnap)
@@ -739,9 +739,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
                inode = &ci->vfs_inode;
                ihold(inode);
                spin_unlock(&mdsc->snap_flush_lock);
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                __ceph_flush_snaps(ci, &session, 0);
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                iput(inode);
                spin_lock(&mdsc->snap_flush_lock);
        }
@@ -847,7 +847,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                                continue;
                        ci = ceph_inode(inode);
-                        spin_lock(&inode->i_lock);
+                        spin_lock(&ci->i_ceph_lock);
                        if (!ci->i_snap_realm)
                                goto skip_inode;
                        /*
@@ -876,7 +876,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                        oldrealm = ci->i_snap_realm;
                        ci->i_snap_realm = realm;
                        spin_unlock(&realm->inodes_with_caps_lock);
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        ceph_get_snap_realm(mdsc, realm);
                        ceph_put_snap_realm(mdsc, oldrealm);
@@ -885,7 +885,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
                        continue;
 skip_inode:
-                        spin_unlock(&inode->i_lock);
+                        spin_unlock(&ci->i_ceph_lock);
                        iput(inode);
                }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8dc73a594a90..11bd0fc4853f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -341,11 +341,11 @@ out:
 /**
 * ceph_show_options - Show mount options in /proc/mounts
 * @m: seq_file to write to
- * @mnt: mount descriptor
+ * @root: root of that (sub)tree
 */
-static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int ceph_show_options(struct seq_file *m, struct dentry *root)
 {
-        struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
+        struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
        struct ceph_mount_options *fsopt = fsc->mount_options;
        struct ceph_options *opt = fsc->client->options;
@@ -383,7 +383,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
        if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
                seq_printf(m, ",rsize=%d", fsopt->rsize);
        if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
-                seq_printf(m, ",rasize=%d", fsopt->rsize);
+                seq_printf(m, ",rasize=%d", fsopt->rasize);
        if (fsopt->congestion_kb != default_congestion_kb())
                seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
        if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 01bf189e08a9..cb3652b37271 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -136,7 +136,7 @@ struct ceph_cap_snap {
        int issued, dirty;
        struct ceph_snap_context *context;
-        mode_t mode;
+        umode_t mode;
        uid_t uid;
        gid_t gid;
@@ -220,7 +220,7 @@ struct ceph_dentry_info {
 * The locking for D_COMPLETE is a bit odd:
 *  - we can clear it at almost any time (see ceph_d_prune)
 *  - it is only meaningful if:
- *    - we hold dir inode i_lock
+ *    - we hold dir inode i_ceph_lock
 *    - we hold dir FILE_SHARED caps
 *    - the dentry D_COMPLETE is set
 */
@@ -250,6 +250,8 @@ struct ceph_inode_xattrs_info {
 struct ceph_inode_info {
        struct ceph_vino i_vino;   /* ceph ino + snap */
+        spinlock_t i_ceph_lock;
        u64 i_version;
        u32 i_time_warp_seq;
@@ -271,7 +273,7 @@ struct ceph_inode_info {
        struct ceph_inode_xattrs_info i_xattrs;
-        /* capabilities.  protected _both_ by i_lock and cap->session's
+        /* capabilities.  protected _both_ by i_ceph_lock and cap->session's
         * s_mutex. */
        struct rb_root i_caps;           /* cap list */
        struct ceph_cap *i_auth_cap;     /* authoritative cap, if any */
@@ -437,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        ci->i_ceph_flags &= ~mask;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 static inline void ceph_i_set(struct inode *inode, unsigned mask)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        ci->i_ceph_flags |= mask;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
 }
 static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -456,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
        struct ceph_inode_info *ci = ceph_inode(inode);
        bool r;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        r = (ci->i_ceph_flags & mask) == mask;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return r;
 }
@@ -508,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
 static inline int ceph_caps_issued(struct ceph_inode_info *ci)
 {
        int issued;
-        spin_lock(&ci->vfs_inode.i_lock);
+        spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
-        spin_unlock(&ci->vfs_inode.i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return issued;
 }
@@ -518,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
                                        int touch)
 {
        int r;
-        spin_lock(&ci->vfs_inode.i_lock);
+        spin_lock(&ci->i_ceph_lock);
        r = __ceph_caps_issued_mask(ci, mask, touch);
-        spin_unlock(&ci->vfs_inode.i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return r;
 }
@@ -743,10 +745,9 @@ extern int ceph_add_cap(struct inode *inode,
 extern void __ceph_remove_cap(struct ceph_cap *cap);
 static inline void ceph_remove_cap(struct ceph_cap *cap)
 {
-        struct inode *inode = &cap->ci->vfs_inode;
+        spin_lock(&cap->ci->i_ceph_lock);
-        spin_lock(&inode->i_lock);
        __ceph_remove_cap(cap);
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&cap->ci->i_ceph_lock);
 }
 extern void ceph_put_cap(struct ceph_mds_client *mdsc,
                         struct ceph_cap *cap);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 96c6739a0280..a5e36e4488a7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
 }
 static int __build_xattrs(struct inode *inode)
-        __releases(inode->i_lock)
+        __releases(ci->i_ceph_lock)
-        __acquires(inode->i_lock)
+        __acquires(ci->i_ceph_lock)
 {
        u32 namelen;
        u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
                end = p + ci->i_xattrs.blob->vec.iov_len;
                ceph_decode_32_safe(&p, end, numattr, bad);
                xattr_version = ci->i_xattrs.version;
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
                                 GFP_NOFS);
@@ -387,7 +387,7 @@ start:
                                goto bad_lock;
                }
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                if (ci->i_xattrs.version != xattr_version) {
                        /* lost a race, retry */
                        for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
        return err;
 bad_lock:
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
 bad:
        if (xattrs) {
                for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
        if (vxattrs)
                vxattr = ceph_match_vxattr(vxattrs, name);
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
             ci->i_xattrs.version, ci->i_xattrs.index_version);
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
                goto get_xattr;
        } else {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                /* get xattrs from mds (if we don't already have them) */
                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
                if (err)
                        return err;
        }
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        if (vxattr && vxattr->readonly) {
                err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
        memcpy(value, xattr->val, xattr->val_len);
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return err;
 }
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
        u32 len;
        int i;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
             ci->i_xattrs.version, ci->i_xattrs.index_version);
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
            (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
                goto list_xattr;
        } else {
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
                if (err)
                        return err;
        }
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        err = __build_xattrs(inode);
        if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
                }
 out:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        return err;
 }
@@ -739,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
        if (!xattr)
                goto out;
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
 retry:
        issued = __ceph_caps_issued(ci, NULL);
        if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -752,12 +752,12 @@ retry:
            required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
                struct ceph_buffer *blob = NULL;
-                spin_unlock(&inode->i_lock);
+                spin_unlock(&ci->i_ceph_lock);
                dout(" preaallocating new blob size=%d\n", required_blob_size);
                blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
                if (!blob)
                        goto out;
-                spin_lock(&inode->i_lock);
+                spin_lock(&ci->i_ceph_lock);
                if (ci->i_xattrs.prealloc_blob)
                        ceph_buffer_put(ci->i_xattrs.prealloc_blob);
                ci->i_xattrs.prealloc_blob = blob;
@@ -770,13 +770,13 @@ retry:
        dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (dirty)
                __mark_inode_dirty(inode, dirty);
        return err;
 do_sync:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        err = ceph_sync_setxattr(dentry, name, value, size, flags);
 out:
        kfree(newname);
@@ -833,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
                        return -EOPNOTSUPP;
        }
-        spin_lock(&inode->i_lock);
+        spin_lock(&ci->i_ceph_lock);
        __build_xattrs(inode);
        issued = __ceph_caps_issued(ci, NULL);
        dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -846,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
        ci->i_xattrs.dirty = true;
        inode->i_ctime = CURRENT_TIME;
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        if (dirty)
                __mark_inode_dirty(inode, dirty);
        return err;
 do_sync:
-        spin_unlock(&inode->i_lock);
+        spin_unlock(&ci->i_ceph_lock);
        err = ceph_send_removexattr(dentry, name);
        return err;
 }
diff --git a/fs/char_dev.c b/fs/char_dev.c
index dca9e5e0f73b..3f152b92a94a 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -272,7 +272,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
        cd = __register_chrdev_region(major, baseminor, count, name);
        if (IS_ERR(cd))
                return PTR_ERR(cd);
-        
        cdev = cdev_alloc();
        if (!cdev)
                goto out2;
@@ -280,7 +280,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
        cdev->owner = fops->owner;
        cdev->ops = fops;
        kobject_set_name(&cdev->kobj, "%s", name);
-                
        err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
        if (err)
                goto out;
@@ -405,7 +405,7 @@ static int chrdev_open(struct inode *inode, struct file *filp)
                goto out_cdev_put;
        if (filp->f_op->open) {
-                ret = filp->f_op->open(inode,filp);
+                ret = filp->f_op->open(inode, filp);
                if (ret)
                        goto out_cdev_put;
        }
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 500d65859279..c865bfdfe819 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -59,8 +59,8 @@ struct cifs_sb_info {
        gid_t   mnt_gid;
        uid_t   mnt_backupuid;
        gid_t   mnt_backupgid;
-        mode_t  mnt_file_mode;
+        umode_t mnt_file_mode;
-        mode_t  mnt_dir_mode;
+        umode_t mnt_dir_mode;
        unsigned int mnt_cifs_flags;
        char   *mountdata; /* options received at mount time or via DFS refs */
        struct backing_dev_info bdi;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8f1fe324162b..b1fd382d1952 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -343,9 +343,9 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
 * ones are.
 */
 static int
-cifs_show_options(struct seq_file *s, struct vfsmount *m)
+cifs_show_options(struct seq_file *s, struct dentry *root)
 {
-        struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
+        struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
        struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
        struct sockaddr *srcaddr;
        srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
@@ -393,7 +393,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
        cifs_show_address(s, tcon->ses->server);
        if (!tcon->unix_ext)
-                seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
+                seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
                                           cifs_sb->mnt_file_mode,
                                           cifs_sb->mnt_dir_mode);
        if (tcon->seal)
@@ -430,7 +430,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
                seq_printf(s, ",cifsacl");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
                seq_printf(s, ",dynperm");
-        if (m->mnt_sb->s_flags & MS_POSIXACL)
+        if (root->d_sb->s_flags & MS_POSIXACL)
                seq_printf(s, ",acl");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
                seq_printf(s, ",mfsymlinks");
@@ -488,7 +488,7 @@ static void cifs_umount_begin(struct super_block *sb)
 }
 #ifdef CONFIG_CIFS_STATS2
-static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt)
+static int cifs_show_stats(struct seq_file *s, struct dentry *root)
 {
        /* BB FIXME */
        return 0;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30ff56005d8f..fe5ecf1b422a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -44,14 +44,14 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
-extern int cifs_create(struct inode *, struct dentry *, int,
+extern int cifs_create(struct inode *, struct dentry *, umode_t,
                       struct nameidata *);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
                                  struct nameidata *);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
 extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
-extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t);
+extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-extern int cifs_mkdir(struct inode *, struct dentry *, int);
+extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
 extern int cifs_rmdir(struct inode *, struct dentry *);
 extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
                       struct dentry *);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8238aa13e01c..ba53c1c6c6cc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -169,8 +169,8 @@ struct smb_vol {
        gid_t linux_gid;
        uid_t backupuid;
        gid_t backupgid;
-        mode_t file_mode;
+        umode_t file_mode;
-        mode_t dir_mode;
+        umode_t dir_mode;
        unsigned secFlg;
        bool retry:1;
        bool intr:1;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8cd4b52d4217..4666780f315d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -282,7 +282,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
        byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
        byte_count += total_in_buf2;
        /* don't allow buffer to overflow */
-        if (byte_count > CIFSMaxBufSize)
+        if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)
                return -ENOBUFS;
        pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
@@ -2122,7 +2122,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
                warned_on_ntlm = true;
                cERROR(1, "default security mechanism requested.  The default "
                        "security mechanism will be upgraded from ntlm to "
-                        "ntlmv2 in kernel release 3.2");
+                        "ntlmv2 in kernel release 3.3");
        }
        ses->overrideSecFlg = volume_info->secFlg;
@@ -2819,7 +2819,7 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
                cifs_sb->mnt_backupgid = pvolume_info->backupgid;
        cifs_sb->mnt_file_mode = pvolume_info->file_mode;
        cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
-        cFYI(1, "file mode: 0x%x  dir mode: 0x%x",
+        cFYI(1, "file mode: 0x%hx  dir mode: 0x%hx",
                cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
        cifs_sb->actimeo = pvolume_info->actimeo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d7eeb9d3ed6f..df8fecb5b993 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -136,7 +136,7 @@ cifs_bp_rename_retry:
 /* Inode operations in similar order to how they appear in Linux file fs.h */
 int
-cifs_create(struct inode *inode, struct dentry *direntry, int mode,
+cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
                struct nameidata *nd)
 {
        int rc = -ENOENT;
@@ -355,7 +355,7 @@ cifs_create_out:
        return rc;
 }
-int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
+int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                dev_t device_number)
 {
        int rc = -EPERM;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e851d5b8931e..a5f54b7d9822 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1264,7 +1264,7 @@ unlink_out:
        return rc;
 }
-int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
+int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
 {
        int rc = 0, tmprc;
        int xid;
@@ -1275,7 +1275,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
        struct inode *newinode = NULL;
        struct cifs_fattr fattr;
-        cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode);
+        cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode);
        cifs_sb = CIFS_SB(inode->i_sb);
        tlink = cifs_sb_tlink(cifs_sb);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 28e7e135cfab..83d2fd8ec24b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,14 +30,14 @@
 #include "coda_int.h"
 /* dir inode-ops */
-static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd);
+static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
 static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
 static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 
                     struct dentry *entry);
 static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
 static int coda_symlink(struct inode *dir_inode, struct dentry *entry,
                        const char *symname);
-static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, int mode);
+static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, umode_t mode);
 static int coda_rmdir(struct inode *dir_inode, struct dentry *entry);
 static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, 
                       struct inode *new_inode, struct dentry *new_dentry);
@@ -191,7 +191,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
 }
 /* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd)
+static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd)
 {
        int error;
        const char *name=de->d_name.name;
@@ -223,7 +223,7 @@ err_out:
        return error;
 }
-static int coda_mkdir(struct inode *dir, struct dentry *de, int mode)
+static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
 {
        struct inode *inode;
        struct coda_vattr attrs;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 871b27715465..1c08a8cd673a 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,6 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
 static void coda_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(coda_inode_cachep, ITOC(inode));
 }
diff --git a/fs/compat.c b/fs/compat.c
index c98787536bb8..fa9d721ecfee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -342,16 +342,9 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
 */
 asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
 {
-        struct super_block *sb;
        struct compat_ustat tmp;
        struct kstatfs sbuf;
-        int err;
+        int err = vfs_ustat(new_decode_dev(dev), &sbuf);
-        sb = user_get_super(new_decode_dev(dev));
-        if (!sb)
-                return -EINVAL;
-        err = statfs_by_dentry(sb->s_root, &sbuf);
-        drop_super(sb);
        if (err)
                return err;
@@ -1288,7 +1281,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
 * O_LARGEFILE flag.
 */
 asmlinkage long
-compat_sys_open(const char __user *filename, int flags, int mode)
+compat_sys_open(const char __user *filename, int flags, umode_t mode)
 {
        return do_sys_open(AT_FDCWD, filename, flags, mode);
 }
@@ -1298,7 +1291,7 @@ compat_sys_open(const char __user *filename, int flags, int mode)
 * O_LARGEFILE flag.
 */
 asmlinkage long
-compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode)
+compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
 {
        return do_sys_open(dfd, filename, flags, mode);
 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 51352de88ef1..a10e428b32b4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1506,35 +1506,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
        return -ENOIOCTLCMD;
 }
-static void compat_ioctl_error(struct file *filp, unsigned int fd,
-                unsigned int cmd, unsigned long arg)
-{
-        char buf[10];
-        char *fn = "?";
-        char *path;
-        /* find the name of the device. */
-        path = (char *)__get_free_page(GFP_KERNEL);
-        if (path) {
-                fn = d_path(&filp->f_path, path, PAGE_SIZE);
-                if (IS_ERR(fn))
-                        fn = "?";
-        }
-         sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
-        if (!isprint(buf[1]))
-                sprintf(buf, "%02x", buf[1]);
-        compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
-                        "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
-                        current->comm, current->pid,
-                        (int)fd, (unsigned int)cmd, buf,
-                        (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
-                        (unsigned int)arg, fn);
-        if (path)
-                free_page((unsigned long)path);
-}
 static int compat_ioctl_check_table(unsigned int xcmd)
 {
        int i;
@@ -1621,13 +1592,8 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
                goto found_handler;
        error = do_ioctl_trans(fd, cmd, arg, filp);
-        if (error == -ENOIOCTLCMD) {
+        if (error == -ENOIOCTLCMD)
-                static int count;
+                error = -ENOTTY;
-                if (++count <= 50)
-                        compat_ioctl_error(filp, fd, cmd, arg);
-                error = -EINVAL;
-        }
        goto out_fput;
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 82bda8fdfc1c..ede857d20a04 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -63,8 +63,8 @@ extern struct kmem_cache *configfs_dir_cachep;
 extern int configfs_is_root(struct config_item *item);
-extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *);
+extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *);
-extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
 extern int configfs_inode_init(void);
 extern void configfs_inode_exit(void);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9a37a9b6de3a..5ddd7ebd9dcd 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -311,8 +311,8 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
        if (item->ci_parent)
                parent = item->ci_parent->ci_dentry;
-        else if (configfs_mount && configfs_mount->mnt_sb)
+        else if (configfs_mount)
-                parent = configfs_mount->mnt_sb->s_root;
+                parent = configfs_mount->mnt_root;
        else
                return -EFAULT;
@@ -1170,7 +1170,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
 }
 EXPORT_SYMBOL(configfs_undepend_item);
-static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int ret = 0;
        int module_got = 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index ca418aaf6352..3ee36d418863 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -116,7 +116,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
        return error;
 }
-static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
+static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
 {
        inode->i_mode = mode;
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -132,7 +132,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
        inode->i_ctime = iattr->ia_ctime;
 }
-struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
+struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd)
 {
        struct inode * inode = new_inode(configfs_sb);
        if (inode) {
@@ -185,7 +185,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
 #endif /* CONFIG_LOCKDEP */
-int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
+int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
 {
        int error = 0;
        struct inode * inode = NULL;
@@ -292,7 +292,7 @@ int __init configfs_inode_init(void)
        return bdi_init(&configfs_backing_dev_info);
 }
-void __exit configfs_inode_exit(void)
+void configfs_inode_exit(void)
 {
        bdi_destroy(&configfs_backing_dev_info);
 }
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ecc62178beda..276e15cafd58 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -143,28 +143,26 @@ static int __init configfs_init(void)
                goto out;
        config_kobj = kobject_create_and_add("config", kernel_kobj);
-        if (!config_kobj) {
+        if (!config_kobj)
-                kmem_cache_destroy(configfs_dir_cachep);
+                goto out2;
-                configfs_dir_cachep = NULL;
-                goto out;
+        err = configfs_inode_init();
-        }
+        if (err)
+                goto out3;
        err = register_filesystem(&configfs_fs_type);
-        if (err) {
+        if (err)
-                printk(KERN_ERR "configfs: Unable to register filesystem!\n");
+                goto out4;
-                kobject_put(config_kobj);
-                kmem_cache_destroy(configfs_dir_cachep);
-                configfs_dir_cachep = NULL;
-                goto out;
-        }
-        err = configfs_inode_init();
+        return 0;
-        if (err) {
+out4:
-                unregister_filesystem(&configfs_fs_type);
+        printk(KERN_ERR "configfs: Unable to register filesystem!\n");
-                kobject_put(config_kobj);
+        configfs_inode_exit();
-                kmem_cache_destroy(configfs_dir_cachep);
+out3:
-                configfs_dir_cachep = NULL;
+        kobject_put(config_kobj);
-        }
+out2:
+        kmem_cache_destroy(configfs_dir_cachep);
+        configfs_dir_cachep = NULL;
 out:
        return err;
 }
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 739fb59bcdc2..a2ee8f9f5a38 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -20,7 +20,6 @@
 #include <linux/cramfs_fs.h>
 #include <linux/slab.h>
 #include <linux/cramfs_fs_sb.h>
-#include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/mutex.h>
@@ -378,7 +377,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                unsigned long nextoffset;
                char *name;
                ino_t ino;
-                mode_t mode;
+                umode_t mode;
                int namelen, error;
                mutex_lock(&read_mutex);
diff --git a/fs/dcache.c b/fs/dcache.c
index 89509b5a090e..9791b1e7eee4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,6 +38,7 @@
 #include <linux/prefetch.h>
 #include <linux/ratelimit.h>
 #include "internal.h"
+#include "mount.h"
 /*
 * Usage:
@@ -2451,6 +2452,7 @@ static int prepend_path(const struct path *path,
 {
        struct dentry *dentry = path->dentry;
        struct vfsmount *vfsmnt = path->mnt;
+        struct mount *mnt = real_mount(vfsmnt);
        bool slash = false;
        int error = 0;
@@ -2460,11 +2462,11 @@ static int prepend_path(const struct path *path,
                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
                        /* Global root? */
-                        if (vfsmnt->mnt_parent == vfsmnt) {
+                        if (!mnt_has_parent(mnt))
                                goto global_root;
-                        }
+                        dentry = mnt->mnt_mountpoint;
-                        dentry = vfsmnt->mnt_mountpoint;
+                        mnt = mnt->mnt_parent;
-                        vfsmnt = vfsmnt->mnt_parent;
+                        vfsmnt = &mnt->mnt;
                        continue;
                }
                parent = dentry->d_parent;
@@ -2501,7 +2503,7 @@ global_root:
        if (!slash)
                error = prepend(buffer, buflen, "/", 1);
        if (!error)
-                error = vfsmnt->mnt_ns ? 1 : 2;
+                error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
        goto out;
 }
@@ -2853,31 +2855,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
        return result;
 }
-int path_is_under(struct path *path1, struct path *path2)
-{
-        struct vfsmount *mnt = path1->mnt;
-        struct dentry *dentry = path1->dentry;
-        int res;
-        br_read_lock(vfsmount_lock);
-        if (mnt != path2->mnt) {
-                for (;;) {
-                        if (mnt->mnt_parent == mnt) {
-                                br_read_unlock(vfsmount_lock);
-                                return 0;
-                        }
-                        if (mnt->mnt_parent == path2->mnt)
-                                break;
-                        mnt = mnt->mnt_parent;
-                }
-                dentry = mnt->mnt_mountpoint;
-        }
-        res = is_subdir(dentry, path2->dentry);
-        br_read_unlock(vfsmount_lock);
-        return res;
-}
-EXPORT_SYMBOL(path_is_under);
 void d_genocide(struct dentry *root)
 {
        struct dentry *this_parent;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 90f76575c056..f65d4455c5e5 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -15,9 +15,11 @@
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/seq_file.h>
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/debugfs.h>
+#include <linux/io.h>
 static ssize_t default_read_file(struct file *file, char __user *buf,
                                 size_t count, loff_t *ppos)
@@ -95,7 +97,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
-struct dentry *debugfs_create_u8(const char *name, mode_t mode,
+struct dentry *debugfs_create_u8(const char *name, umode_t mode,
                                 struct dentry *parent, u8 *value)
 {
        /* if there are no write bits set, make read only */
@@ -147,7 +149,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
-struct dentry *debugfs_create_u16(const char *name, mode_t mode,
+struct dentry *debugfs_create_u16(const char *name, umode_t mode,
                                  struct dentry *parent, u16 *value)
 {
        /* if there are no write bits set, make read only */
@@ -199,7 +201,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
-struct dentry *debugfs_create_u32(const char *name, mode_t mode,
+struct dentry *debugfs_create_u32(const char *name, umode_t mode,
                                 struct dentry *parent, u32 *value)
 {
        /* if there are no write bits set, make read only */
@@ -252,7 +254,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
-struct dentry *debugfs_create_u64(const char *name, mode_t mode,
+struct dentry *debugfs_create_u64(const char *name, umode_t mode,
                                 struct dentry *parent, u64 *value)
 {
        /* if there are no write bits set, make read only */
@@ -298,7 +300,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n
 * @value: a pointer to the variable that the file should read to and write
 *         from.
 */
-struct dentry *debugfs_create_x8(const char *name, mode_t mode,
+struct dentry *debugfs_create_x8(const char *name, umode_t mode,
                                 struct dentry *parent, u8 *value)
 {
        /* if there are no write bits set, make read only */
@@ -322,7 +324,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8);
 * @value: a pointer to the variable that the file should read to and write
 *         from.
 */
-struct dentry *debugfs_create_x16(const char *name, mode_t mode,
+struct dentry *debugfs_create_x16(const char *name, umode_t mode,
                                 struct dentry *parent, u16 *value)
 {
        /* if there are no write bits set, make read only */
@@ -346,7 +348,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16);
 * @value: a pointer to the variable that the file should read to and write
 *         from.
 */
-struct dentry *debugfs_create_x32(const char *name, mode_t mode,
+struct dentry *debugfs_create_x32(const char *name, umode_t mode,
                                 struct dentry *parent, u32 *value)
 {
        /* if there are no write bits set, make read only */
@@ -370,7 +372,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x32);
 * @value: a pointer to the variable that the file should read to and write
 *         from.
 */
-struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+struct dentry *debugfs_create_x64(const char *name, umode_t mode,
                                 struct dentry *parent, u64 *value)
 {
        return debugfs_create_file(name, mode, parent, value, &fops_x64);
@@ -401,7 +403,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
 * @value: a pointer to the variable that the file should read to and write
 *         from.
 */
-struct dentry *debugfs_create_size_t(const char *name, mode_t mode,
+struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
                                     struct dentry *parent, size_t *value)
 {
        return debugfs_create_file(name, mode, parent, value, &fops_size_t);
@@ -473,7 +475,7 @@ static const struct file_operations fops_bool = {
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
-struct dentry *debugfs_create_bool(const char *name, mode_t mode,
+struct dentry *debugfs_create_bool(const char *name, umode_t mode,
                                   struct dentry *parent, u32 *value)
 {
        return debugfs_create_file(name, mode, parent, value, &fops_bool);
@@ -518,10 +520,103 @@ static const struct file_operations fops_blob = {
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
-struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+struct dentry *debugfs_create_blob(const char *name, umode_t mode,
                                   struct dentry *parent,
                                   struct debugfs_blob_wrapper *blob)
 {
        return debugfs_create_file(name, mode, parent, blob, &fops_blob);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_blob);
+#ifdef CONFIG_HAS_IOMEM
+/*
+ * The regset32 stuff is used to print 32-bit registers using the
+ * seq_file utilities. We offer printing a register set in an already-opened
+ * sequential file or create a debugfs file that only prints a regset32.
+ */
+/**
+ * debugfs_print_regs32 - use seq_print to describe a set of registers
+ * @s: the seq_file structure being used to generate output
+ * @regs: an array if struct debugfs_reg32 structures
+ * @mregs: the length of the above array
+ * @base: the base address to be used in reading the registers
+ * @prefix: a string to be prefixed to every output line
+ *
+ * This function outputs a text block describing the current values of
+ * some 32-bit hardware registers. It is meant to be used within debugfs
+ * files based on seq_file that need to show registers, intermixed with other
+ * information. The prefix argument may be used to specify a leading string,
+ * because some peripherals have several blocks of identical registers,
+ * for example configuration of dma channels
+ */
+int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+                           int nregs, void __iomem *base, char *prefix)
+{
+        int i, ret = 0;
+        for (i = 0; i < nregs; i++, regs++) {
+                if (prefix)
+                        ret += seq_printf(s, "%s", prefix);
+                ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
+                                  readl(base + regs->offset));
+        }
+        return ret;
+}
+EXPORT_SYMBOL_GPL(debugfs_print_regs32);
+static int debugfs_show_regset32(struct seq_file *s, void *data)
+{
+        struct debugfs_regset32 *regset = s->private;
+        debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
+        return 0;
+}
+static int debugfs_open_regset32(struct inode *inode, struct file *file)
+{
+        return single_open(file, debugfs_show_regset32, inode->i_private);
+}
+static const struct file_operations fops_regset32 = {
+        .open =         debugfs_open_regset32,
+        .read =         seq_read,
+        .llseek =       seq_lseek,
+        .release =      single_release,
+};
+/**
+ * debugfs_create_regset32 - create a debugfs file that returns register values
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @regset: a pointer to a struct debugfs_regset32, which contains a pointer
+ *          to an array of register definitions, the array size and the base
+ *          address where the register bank is to be found.
+ *
+ * This function creates a file in debugfs with the given name that reports
+ * the names and values of a set of 32-bit registers. If the @mode variable
+ * is so set it can be read from. Writing is not supported.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, %NULL will be returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.  It is not wise to check for this value, but rather, check for
+ * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
+ * code.
+ */
+struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
+                                       struct dentry *parent,
+                                       struct debugfs_regset32 *regset)
+{
+        return debugfs_create_file(name, mode, parent, regset, &fops_regset32);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_regset32);
+#endif /* CONFIG_HAS_IOMEM */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f3a257d7a985..956d5ddddf6e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -30,7 +30,7 @@ static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
-static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev,
+static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev,
                                       void *data, const struct file_operations *fops)
 {
@@ -69,7 +69,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 /* SMP-safe */
 static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
-                         int mode, dev_t dev, void *data,
+                         umode_t mode, dev_t dev, void *data,
                         const struct file_operations *fops)
 {
        struct inode *inode;
@@ -87,7 +87,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
        return error;
 }
-static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
+static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
                         void *data, const struct file_operations *fops)
 {
        int res;
@@ -101,14 +101,14 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
        return res;
 }
-static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode,
+static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
                        void *data, const struct file_operations *fops)
 {
        mode = (mode & S_IALLUGO) | S_IFLNK;
        return debugfs_mknod(dir, dentry, mode, 0, data, fops);
 }
-static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                          void *data, const struct file_operations *fops)
 {
        int res;
@@ -146,7 +146,7 @@ static struct file_system_type debug_fs_type = {
        .kill_sb =      kill_litter_super,
 };
-static int debugfs_create_by_name(const char *name, mode_t mode,
+static int debugfs_create_by_name(const char *name, umode_t mode,
                                  struct dentry *parent,
                                  struct dentry **dentry,
                                  void *data,
@@ -160,7 +160,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
         * have around.
         */
        if (!parent)
-                parent = debugfs_mount->mnt_sb->s_root;
+                parent = debugfs_mount->mnt_root;
        *dentry = NULL;
        mutex_lock(&parent->d_inode->i_mutex);
@@ -214,7 +214,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
 * returned.
 */
-struct dentry *debugfs_create_file(const char *name, mode_t mode,
+struct dentry *debugfs_create_file(const char *name, umode_t mode,
                                   struct dentry *parent, void *data,
                                   const struct file_operations *fops)
 {
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d5d5297efe97..c4e2a58a2e82 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -246,9 +246,9 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
        return err;
 }
-static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int devpts_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb);
+        struct pts_fs_info *fsi = DEVPTS_SB(root->d_sb);
        struct pts_mount_opts *opts = &fsi->mount_opts;
        if (opts->setuid)
@@ -301,7 +301,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
        inode = new_inode(s);
        if (!inode)
-                goto free_fsi;
+                goto fail;
        inode->i_ino = 1;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
@@ -316,8 +316,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
        printk(KERN_ERR "devpts: get root dentry failed\n");
        iput(inode);
-free_fsi:
-        kfree(s->s_fs_info);
 fail:
        return -ENOMEM;
 }
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 990626e7da80..0b3109ee4257 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -281,7 +281,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
        } else {
                struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
                struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
-                ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr);
+                ret6->sin6_addr = in6->sin6_addr;
        }
        return 0;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 32f90a3ae63e..19a8ca4ab1dd 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -144,24 +144,6 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
 }
 /**
- * ecryptfs_create_underlying_file
- * @lower_dir_inode: inode of the parent in the lower fs of the new file
- * @dentry: New file's dentry
- * @mode: The mode of the new file
- *
- * Creates the file in the lower file system.
- *
- * Returns zero on success; non-zero on error condition
- */
-static int
-ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
-                                struct dentry *dentry, int mode)
-{
-        struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-        return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
-}
-/**
 * ecryptfs_do_create
 * @directory_inode: inode of the new file's dentry's parent in ecryptfs
 * @ecryptfs_dentry: New file's dentry in ecryptfs
@@ -176,7 +158,7 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
 */
 static struct inode *
 ecryptfs_do_create(struct inode *directory_inode,
-                   struct dentry *ecryptfs_dentry, int mode)
+                   struct dentry *ecryptfs_dentry, umode_t mode)
 {
        int rc;
        struct dentry *lower_dentry;
@@ -191,8 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode,
                inode = ERR_CAST(lower_dir_dentry);
                goto out;
        }
-        rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
+        rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL);
-                                             ecryptfs_dentry, mode);
        if (rc) {
                printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
                       "rc = [%d]\n", __func__, rc);
@@ -267,7 +248,7 @@ out:
 */
 static int
 ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
-                int mode, struct nameidata *nd)
+                umode_t mode, struct nameidata *nd)
 {
        struct inode *ecryptfs_inode;
        int rc;
@@ -559,7 +540,7 @@ out_lock:
        return rc;
 }
-static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int rc;
        struct dentry *lower_dentry;
@@ -607,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 }
 static int
-ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
        int rc;
        struct dentry *lower_dentry;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index dbd52d40df4c..9df7fd6e0c39 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -69,7 +69,6 @@ static void ecryptfs_i_callback(struct rcu_head *head)
        struct ecryptfs_inode_info *inode_info;
        inode_info = ecryptfs_inode_to_private(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
 }
@@ -132,9 +131,9 @@ static void ecryptfs_evict_inode(struct inode *inode)
 * Prints the mount options for a given superblock.
 * Returns zero; does not fail.
 */
-static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
 {
-        struct super_block *sb = mnt->mnt_sb;
+        struct super_block *sb = root->d_sb;
        struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
                &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
        struct ecryptfs_global_auth_tok *walker;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 0f31acb0131c..981106429a9f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -68,7 +68,6 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
 static void efs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
 }
diff --git a/fs/exec.c b/fs/exec.c
index 36254645b7cc..3f64b9f26e7d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1225,7 +1225,7 @@ EXPORT_SYMBOL(install_exec_creds);
 * - the caller must hold ->cred_guard_mutex to protect against
 *   PTRACE_ATTACH
 */
-int check_unsafe_exec(struct linux_binprm *bprm)
+static int check_unsafe_exec(struct linux_binprm *bprm)
 {
        struct task_struct *p = current, *t;
        unsigned n_fs;
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index d0941c6a1f72..80405836ba6e 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -234,7 +234,7 @@ static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
 static inline
 void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
 {
-        mode_t mode = inode->i_mode;
+        umode_t mode = inode->i_mode;
        de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
 }
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 51f4b4c40f09..ca9d49665ef6 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -154,7 +154,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
                loff_t pos, unsigned len, unsigned flags,
                struct page **pagep, void **fsdata);
 extern struct inode *exofs_iget(struct super_block *, unsigned long);
-struct inode *exofs_new_inode(struct inode *, int);
+struct inode *exofs_new_inode(struct inode *, umode_t);
 extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
 extern void exofs_evict_inode(struct inode *);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f6dbf7768ce6..ea5e1f97806a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1276,7 +1276,7 @@ static void create_done(struct ore_io_state *ios, void *p)
 /*
 * Set up a new inode and create an object for it on the OSD
 */
-struct inode *exofs_new_inode(struct inode *dir, int mode)
+struct inode *exofs_new_inode(struct inode *dir, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct exofs_sb_info *sbi = sb->s_fs_info;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b54c43775f17..9dbf0c301030 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -59,7 +59,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
        return d_splice_alias(inode, dentry);
 }
-static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                         struct nameidata *nd)
 {
        struct inode *inode = exofs_new_inode(dir, mode);
@@ -74,7 +74,7 @@ static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
        return err;
 }
-static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
                       dev_t rdev)
 {
        struct inode *inode;
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
        return exofs_add_nondir(dentry, inode);
 }
-static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        int err = -EMLINK;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index e6085ec192d6..d22cd168c6ee 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -166,7 +166,6 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
 static void exofs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
 }
@@ -839,6 +838,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
        ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
        if (ret) {
                EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
+                dput(sb->s_root);
+                sb->s_root = NULL;
                goto free_sbi;
        }
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 47cda410b548..d37df352d324 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -279,7 +279,7 @@ static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
 static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
 {
-        mode_t mode = inode->i_mode;
+        umode_t mode = inode->i_mode;
        if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
                de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
        else
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9a4e5e206d08..75ad433c6691 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
 /* ialloc.c */
-extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *);
+extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
 extern void ext2_free_inode (struct inode *);
 extern unsigned long ext2_count_free_inodes (struct super_block *);
 extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index c4e81dfb74ba..cd7f5f424a75 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,7 @@ found:
        return group;
 }
-struct inode *ext2_new_inode(struct inode *dir, int mode,
+struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
                             const struct qstr *qstr)
 {
        struct super_block *sb;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index f81e250ac5c4..1089f760c847 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -35,7 +35,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case EXT2_IOC_SETFLAGS: {
                unsigned int oldflags;
-                ret = mnt_want_write(filp->f_path.mnt);
+                ret = mnt_want_write_file(filp);
                if (ret)
                        return ret;
@@ -83,7 +83,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
 setflags_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return ret;
        }
        case EXT2_IOC_GETVERSION:
@@ -91,7 +91,7 @@ setflags_out:
        case EXT2_IOC_SETVERSION:
                if (!inode_owner_or_capable(inode))
                        return -EPERM;
-                ret = mnt_want_write(filp->f_path.mnt);
+                ret = mnt_want_write_file(filp);
                if (ret)
                        return ret;
                if (get_user(inode->i_generation, (int __user *) arg)) {
@@ -100,7 +100,7 @@ setflags_out:
                        inode->i_ctime = CURRENT_TIME_SEC;
                        mark_inode_dirty(inode);
                }
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return ret;
        case EXT2_IOC_GETRSVSZ:
                if (test_opt(inode->i_sb, RESERVATION)
@@ -121,7 +121,7 @@ setflags_out:
                if (get_user(rsv_window_size, (int __user *)arg))
                        return -EFAULT;
-                ret = mnt_want_write(filp->f_path.mnt);
+                ret = mnt_want_write_file(filp);
                if (ret)
                        return ret;
@@ -145,7 +145,7 @@ setflags_out:
                        rsv->rsv_goal_size = rsv_window_size;
                }
                mutex_unlock(&ei->truncate_mutex);
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return 0;
        }
        default:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 761fde807fc9..080419814bae 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
 * If the create succeeds, we fill in the inode information
 * with d_instantiate(). 
 */
-static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
+static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
 {
        struct inode *inode;
@@ -119,7 +119,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
        return ext2_add_nondir(dentry, inode);
 }
-static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode * inode;
        int err;
@@ -214,7 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
        return err;
 }
-static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
        struct inode * inode;
        int err = -EMLINK;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index bd8ac164a3bf..9b403f064ce0 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -173,7 +173,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
 static void ext2_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
@@ -211,9 +210,9 @@ static void destroy_inodecache(void)
        kmem_cache_destroy(ext2_inode_cachep);
 }
-static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext2_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct super_block *sb = vfs->mnt_sb;
+        struct super_block *sb = root->d_sb;
        struct ext2_sb_info *sbi = EXT2_SB(sb);
        struct ext2_super_block *es = sbi->s_es;
        unsigned long def_mount_opts;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 5c866e06e7ab..92cc86dfa23d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -371,7 +371,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 * group to find a free inode.
 */
 struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
-                             const struct qstr *qstr, int mode)
+                             const struct qstr *qstr, umode_t mode)
 {
        struct super_block *sb;
        struct buffer_head *bitmap_bh = NULL;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 85fe655fe3e0..15cb47088aac 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2490,7 +2490,7 @@ int ext3_can_truncate(struct inode *inode)
 * transaction, and VFS/VM ensures that ext3_truncate() cannot run
 * simultaneously on behalf of the same inode.
 *
- * As we work through the truncate and commmit bits of it to the journal there
+ * As we work through the truncate and commit bits of it to the journal there
 * is one core, guiding principle: the file's tree must always be consistent on
 * disk.  We must be able to restart the truncate after a crash.
 *
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index ba1b54e23cae..8e37c41a071b 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -44,7 +44,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (get_user(flags, (int __user *) arg))
                        return -EFAULT;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -110,7 +110,7 @@ flags_err:
                        err = ext3_change_inode_journal_flag(inode, jflag);
 flags_out:
                mutex_unlock(&inode->i_mutex);
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case EXT3_IOC_GETVERSION:
@@ -126,7 +126,7 @@ flags_out:
                if (!inode_owner_or_capable(inode))
                        return -EPERM;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
                if (get_user(generation, (int __user *) arg)) {
@@ -147,7 +147,7 @@ flags_out:
                }
                ext3_journal_stop(handle);
 setversion_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case EXT3_IOC_GETRSVSZ:
@@ -164,7 +164,7 @@ setversion_out:
                if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
                        return -ENOTTY;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -195,7 +195,7 @@ setversion_out:
                }
                mutex_unlock(&ei->truncate_mutex);
 setrsvsz_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case EXT3_IOC_GROUP_EXTEND: {
@@ -206,7 +206,7 @@ setrsvsz_out:
                if (!capable(CAP_SYS_RESOURCE))
                        return -EPERM;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -221,7 +221,7 @@ setrsvsz_out:
                if (err == 0)
                        err = err2;
 group_extend_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case EXT3_IOC_GROUP_ADD: {
@@ -232,7 +232,7 @@ group_extend_out:
                if (!capable(CAP_SYS_RESOURCE))
                        return -EPERM;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -249,7 +249,7 @@ group_extend_out:
                if (err == 0)
                        err = err2;
 group_add_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case FITRIM: {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 642dc6d66dfd..d269821203fd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1698,7 +1698,7 @@ static int ext3_add_nondir(handle_t *handle,
 * If the create succeeds, we fill in the inode information
 * with d_instantiate().
 */
-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
                struct nameidata *nd)
 {
        handle_t *handle;
@@ -1732,7 +1732,7 @@ retry:
 }
 static int ext3_mknod (struct inode * dir, struct dentry *dentry,
-                        int mode, dev_t rdev)
+                        umode_t mode, dev_t rdev)
 {
        handle_t *handle;
        struct inode *inode;
@@ -1768,7 +1768,7 @@ retry:
        return err;
 }
-static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
        handle_t *handle;
        struct inode * inode;
@@ -2272,7 +2272,7 @@ retry:
                        err = PTR_ERR(handle);
                        goto err_drop_inode;
                }
-                inc_nlink(inode);
+                set_nlink(inode, 1);
                err = ext3_orphan_del(handle, inode);
                if (err) {
                        ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 922d289aeeb3..3a10b884e1be 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -511,7 +511,6 @@ static int ext3_drop_inode(struct inode *inode)
 static void ext3_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
 }
@@ -611,9 +610,9 @@ static char *data_mode_string(unsigned long mode)
 *  - it's set to a non-default value OR
 *  - if the per-sb default is different from the global default
 */
-static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext3_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct super_block *sb = vfs->mnt_sb;
+        struct super_block *sb = root->d_sb;
        struct ext3_sb_info *sbi = EXT3_SB(sb);
        struct ext3_super_block *es = sbi->s_es;
        unsigned long def_mount_opts;
@@ -2910,7 +2909,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
                return -EINVAL;
        /* Quotafile not on the same filesystem? */
-        if (path->mnt->mnt_sb != sb)
+        if (path->dentry->d_sb != sb)
                return -EXDEV;
        /* Journaling quota? */
        if (EXT3_SB(sb)->s_qf_names[type]) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b0e26a1272d..1554b15f91bc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1819,7 +1819,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
                          dx_hash_info *hinfo);
 /* ialloc.c */
-extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
+extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
                                    const struct qstr *qstr, __u32 goal,
                                    uid_t *owner);
 extern void ext4_free_inode(handle_t *, struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 61fa9e1614af..607b1557d292 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1095,7 +1095,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
                  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
                  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
-        neh->eh_depth = cpu_to_le16(neh->eh_depth + 1);
+        neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
        ext4_mark_inode_dirty(handle, inode);
 out:
        brelse(bh);
@@ -2955,7 +2955,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        /* Pre-conditions */
        BUG_ON(!ext4_ext_is_uninitialized(ex));
        BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
-        BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
        /*
         * Attempt to transfer newly initialized blocks from the currently
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 00beb4f9cc4f..4637af036d9c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -351,7 +351,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
 */
 static int find_group_orlov(struct super_block *sb, struct inode *parent,
-                            ext4_group_t *group, int mode,
+                            ext4_group_t *group, umode_t mode,
                            const struct qstr *qstr)
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
@@ -497,7 +497,7 @@ fallback_retry:
 }
 static int find_group_other(struct super_block *sb, struct inode *parent,
-                            ext4_group_t *group, int mode)
+                            ext4_group_t *group, umode_t mode)
 {
        ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
        ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
@@ -602,7 +602,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 */
 static int ext4_claim_inode(struct super_block *sb,
                        struct buffer_head *inode_bitmap_bh,
-                        unsigned long ino, ext4_group_t group, int mode)
+                        unsigned long ino, ext4_group_t group, umode_t mode)
 {
        int free = 0, retval = 0, count;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -690,7 +690,7 @@ err_ret:
 * For other inodes, search forward from the parent directory's block
 * group to find a free inode.
 */
-struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
+struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
                             const struct qstr *qstr, __u32 goal, uid_t *owner)
 {
        struct super_block *sb;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 848f436df29f..7dbcc3e84570 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1339,8 +1339,11 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                                        clear_buffer_unwritten(bh);
                                }
-                                /* skip page if block allocation undone */
+                                /*
-                                if (buffer_delay(bh) || buffer_unwritten(bh))
+                                 * skip page if block allocation undone and
+                                 * block is dirty
+                                 */
+                                if (ext4_bh_delay_or_unwritten(NULL, bh))
                                        skip_page = 1;
                                bh = bh->b_this_page;
                                block_start += bh->b_size;
@@ -1878,7 +1881,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
 * a[0] = 'a';
 * truncate(f, 4096);
 * we have in the page first buffer_head mapped via page_mkwrite call back
- * but other bufer_heads would be unmapped but dirty(dirty done via the
+ * but other buffer_heads would be unmapped but dirty (dirty done via the
 * do_wp_page). So writepage should write the first block. If we modify
 * the mmap area beyond 1024 we will again get a page_fault and the
 * page_mkwrite callback will do the block allocation and mark the
@@ -2387,7 +2390,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
        pgoff_t index;
        struct inode *inode = mapping->host;
        handle_t *handle;
-        loff_t page_len;
        index = pos >> PAGE_CACHE_SHIFT;
@@ -2434,13 +2436,6 @@ retry:
                 */
                if (pos + len > inode->i_size)
                        ext4_truncate_failed_write(inode);
-        } else {
-                page_len = pos & (PAGE_CACHE_SIZE - 1);
-                if (page_len > 0) {
-                        ret = ext4_discard_partial_page_buffers_no_lock(handle,
-                                inode, page, pos - page_len, page_len,
-                                EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
-                }
        }
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2483,7 +2478,6 @@ static int ext4_da_write_end(struct file *file,
        loff_t new_i_size;
        unsigned long start, end;
        int write_mode = (int)(unsigned long)fsdata;
-        loff_t page_len;
        if (write_mode == FALL_BACK_TO_NONDELALLOC) {
                if (ext4_should_order_data(inode)) {
@@ -2508,7 +2502,7 @@ static int ext4_da_write_end(struct file *file,
         */
        new_i_size = pos + copied;
-        if (new_i_size > EXT4_I(inode)->i_disksize) {
+        if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
                if (ext4_da_should_update_i_disksize(page, end)) {
                        down_write(&EXT4_I(inode)->i_data_sem);
                        if (new_i_size > EXT4_I(inode)->i_disksize) {
@@ -2532,16 +2526,6 @@ static int ext4_da_write_end(struct file *file,
        }
        ret2 = generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-        page_len = PAGE_CACHE_SIZE -
-                        ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
-        if (page_len > 0) {
-                ret = ext4_discard_partial_page_buffers_no_lock(handle,
-                        inode, page, pos + copied - 1, page_len,
-                        EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
-        }
        copied = ret2;
        if (ret2 < 0)
                ret = ret2;
@@ -2781,10 +2765,11 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                  iocb->private, io_end->inode->i_ino, iocb, offset,
                  size);
+        iocb->private = NULL;
        /* if not aio dio with unwritten extents, just free io and return */
        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
                ext4_free_io_end(io_end);
-                iocb->private = NULL;
 out:
                if (is_async)
                        aio_complete(iocb, ret, 0);
@@ -2807,7 +2792,6 @@ out:
        spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
        /* queue the work to convert unwritten extents to written */
-        iocb->private = NULL;
        queue_work(wq, &io_end->work);
        /* XXX: probably should move into the real I/O completion handler */
@@ -3203,26 +3187,8 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
        iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-        if (!page_has_buffers(page)) {
+        if (!page_has_buffers(page))
-                /*
+                create_empty_buffers(page, blocksize, 0);
-                 * If the range to be discarded covers a partial block
-                 * we need to get the page buffers.  This is because
-                 * partial blocks cannot be released and the page needs
-                 * to be updated with the contents of the block before
-                 * we write the zeros on top of it.
-                 */
-                if ((from & (blocksize - 1)) ||
-                    ((from + length) & (blocksize - 1))) {
-                        create_empty_buffers(page, blocksize, 0);
-                } else {
-                        /*
-                         * If there are no partial blocks,
-                         * there is nothing to update,
-                         * so we can return now
-                         */
-                        return 0;
-                }
-        }
        /* Find the buffer that contains "offset" */
        bh = page_buffers(page);
@@ -3503,7 +3469,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 * transaction, and VFS/VM ensures that ext4_truncate() cannot run
 * simultaneously on behalf of the same inode.
 *
- * As we work through the truncate and commmit bits of it to the journal there
+ * As we work through the truncate and commit bits of it to the journal there
 * is one core, guiding principle: the file's tree must always be consistent on
 * disk.  We must be able to restart the truncate after a crash.
 *
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a56796814d6a..d37b3bb2a3b8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -45,7 +45,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (get_user(flags, (int __user *) arg))
                        return -EFAULT;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -134,7 +134,7 @@ flags_err:
                        err = ext4_ext_migrate(inode);
 flags_out:
                mutex_unlock(&inode->i_mutex);
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case EXT4_IOC_GETVERSION:
@@ -150,7 +150,7 @@ flags_out:
                if (!inode_owner_or_capable(inode))
                        return -EPERM;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
                if (get_user(generation, (int __user *) arg)) {
@@ -171,7 +171,7 @@ flags_out:
                }
                ext4_journal_stop(handle);
 setversion_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        case EXT4_IOC_GROUP_EXTEND: {
@@ -192,7 +192,7 @@ setversion_out:
                        return -EOPNOTSUPP;
                }
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -204,7 +204,7 @@ setversion_out:
                }
                if (err == 0)
                        err = err2;
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                ext4_resize_end(sb);
                return err;
@@ -240,13 +240,13 @@ setversion_out:
                        return -EOPNOTSUPP;
                }
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        goto mext_out;
                err = ext4_move_extents(filp, donor_filp, me.orig_start,
                                        me.donor_start, me.len, &me.moved_len);
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                if (me.moved_len > 0)
                        file_remove_suid(donor_filp);
@@ -277,7 +277,7 @@ mext_out:
                        return -EOPNOTSUPP;
                }
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -289,7 +289,7 @@ mext_out:
                }
                if (err == 0)
                        err = err2;
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                ext4_resize_end(sb);
                return err;
@@ -301,7 +301,7 @@ mext_out:
                if (!inode_owner_or_capable(inode))
                        return -EACCES;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
                /*
@@ -313,7 +313,7 @@ mext_out:
                mutex_lock(&(inode->i_mutex));
                err = ext4_ext_migrate(inode);
                mutex_unlock(&(inode->i_mutex));
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
@@ -323,11 +323,11 @@ mext_out:
                if (!inode_owner_or_capable(inode))
                        return -EACCES;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
                err = ext4_alloc_da_blocks(inode);
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index aa4c782c9dd7..2043f482375d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1736,7 +1736,7 @@ static int ext4_add_nondir(handle_t *handle,
 * If the create succeeds, we fill in the inode information
 * with d_instantiate().
 */
-static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                       struct nameidata *nd)
 {
        handle_t *handle;
@@ -1770,7 +1770,7 @@ retry:
 }
 static int ext4_mknod(struct inode *dir, struct dentry *dentry,
-                      int mode, dev_t rdev)
+                      umode_t mode, dev_t rdev)
 {
        handle_t *handle;
        struct inode *inode;
@@ -1806,7 +1806,7 @@ retry:
        return err;
 }
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        handle_t *handle;
        struct inode *inode;
@@ -2315,7 +2315,7 @@ retry:
                        err = PTR_ERR(handle);
                        goto err_drop_inode;
                }
-                inc_nlink(inode);
+                set_nlink(inode, 1);
                err = ext4_orphan_del(handle, inode);
                if (err) {
                        ext4_journal_stop(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7ce1d0b19c94..7e106c810c62 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -385,6 +385,18 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                block_end = block_start + blocksize;
                if (block_start >= len) {
+                        /*
+                         * Comments copied from block_write_full_page_endio:
+                         *
+                         * The page straddles i_size.  It must be zeroed out on
+                         * each and every writepage invocation because it may
+                         * be mmapped.  "A file is mapped in multiples of the
+                         * page size.  For a file that is not a multiple of
+                         * the  page size, the remaining memory is zeroed when
+                         * mapped, and writes to that region are not written
+                         * out to the file."
+                         */
+                        zero_user_segment(page, block_start, block_end);
                        clear_buffer_dirty(bh);
                        set_buffer_uptodate(bh);
                        continue;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3858767ec672..64e2529ae9bb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -930,7 +930,6 @@ static int ext4_drop_inode(struct inode *inode)
 static void ext4_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
@@ -1033,11 +1032,11 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
 *  - it's set to a non-default value OR
 *  - if the per-sb default is different from the global default
 */
-static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext4_show_options(struct seq_file *seq, struct dentry *root)
 {
        int def_errors;
        unsigned long def_mount_opts;
-        struct super_block *sb = vfs->mnt_sb;
+        struct super_block *sb = root->d_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
@@ -1155,9 +1154,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_puts(seq, ",block_validity");
        if (!test_opt(sb, INIT_INODE_TABLE))
-                seq_puts(seq, ",noinit_inode_table");
+                seq_puts(seq, ",noinit_itable");
        else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
-                seq_printf(seq, ",init_inode_table=%u",
+                seq_printf(seq, ",init_itable=%u",
                           (unsigned) sbi->s_li_wait_mult);
        ext4_show_quota_options(seq, sb);
@@ -1333,8 +1332,7 @@ enum {
        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
        Opt_inode_readahead_blks, Opt_journal_ioprio,
        Opt_dioread_nolock, Opt_dioread_lock,
-        Opt_discard, Opt_nodiscard,
+        Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
-        Opt_init_inode_table, Opt_noinit_inode_table,
 };
 static const match_table_t tokens = {
@@ -1407,9 +1405,9 @@ static const match_table_t tokens = {
        {Opt_dioread_lock, "dioread_lock"},
        {Opt_discard, "discard"},
        {Opt_nodiscard, "nodiscard"},
-        {Opt_init_inode_table, "init_itable=%u"},
+        {Opt_init_itable, "init_itable=%u"},
-        {Opt_init_inode_table, "init_itable"},
+        {Opt_init_itable, "init_itable"},
-        {Opt_noinit_inode_table, "noinit_itable"},
+        {Opt_noinit_itable, "noinit_itable"},
        {Opt_err, NULL},
 };
@@ -1892,7 +1890,7 @@ set_qf_format:
                case Opt_dioread_lock:
                        clear_opt(sb, DIOREAD_NOLOCK);
                        break;
-                case Opt_init_inode_table:
+                case Opt_init_itable:
                        set_opt(sb, INIT_INODE_TABLE);
                        if (args[0].from) {
                                if (match_int(&args[0], &option))
@@ -1903,7 +1901,7 @@ set_qf_format:
                                return 0;
                        sbi->s_li_wait_mult = option;
                        break;
-                case Opt_noinit_inode_table:
+                case Opt_noinit_itable:
                        clear_opt(sb, INIT_INODE_TABLE);
                        break;
                default:
@@ -2884,8 +2882,7 @@ cont_thread:
                }
                mutex_unlock(&eli->li_list_mtx);
-                if (freezing(current))
+                try_to_freeze();
-                        refrigerator();
                cur = jiffies;
                if ((time_after_eq(cur, next_wakeup)) ||
@@ -4783,7 +4780,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
                return -EINVAL;
        /* Quotafile not on the same filesystem? */
-        if (path->mnt->mnt_sb != sb)
+        if (path->dentry->d_sb != sb)
                return -EXDEV;
        /* Journaling quota? */
        if (EXT4_SB(sb)->s_qf_names[type]) {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 1510a4d51990..66994f316e18 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -141,7 +141,7 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
 static inline int fat_mode_can_hold_ro(struct inode *inode)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
-        mode_t mask;
+        umode_t mask;
        if (S_ISDIR(inode->i_mode)) {
                if (!sbi->options.rodir)
@@ -156,8 +156,8 @@ static inline int fat_mode_can_hold_ro(struct inode *inode)
 }
 /* Convert attribute bits and a mask to the UNIX mode. */
-static inline mode_t fat_make_mode(struct msdos_sb_info *sbi,
+static inline umode_t fat_make_mode(struct msdos_sb_info *sbi,
-                                   u8 attrs, mode_t mode)
+                                   u8 attrs, umode_t mode)
 {
        if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir))
                mode &= ~S_IWUGO;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c118acf16e43..a71fe3715ee8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -44,7 +44,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
                goto out;
        mutex_lock(&inode->i_mutex);
-        err = mnt_want_write(file->f_path.mnt);
+        err = mnt_want_write_file(file);
        if (err)
                goto out_unlock_inode;
@@ -108,7 +108,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        fat_save_attrs(inode, attr);
        mark_inode_dirty(inode);
 out_drop_write:
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
 out_unlock_inode:
        mutex_unlock(&inode->i_mutex);
 out:
@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(fat_getattr);
 static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
                             struct inode *inode, umode_t *mode_ptr)
 {
-        mode_t mask, perm;
+        umode_t mask, perm;
        /*
         * Note, the basic check is already done by a caller of
@@ -351,7 +351,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
 static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 {
-        mode_t allow_utime = sbi->options.allow_utime;
+        umode_t allow_utime = sbi->options.allow_utime;
        if (current_fsuid() != inode->i_uid) {
                if (in_group_p(inode->i_gid))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 808cac7edcfb..3ab841054d53 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,6 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
 static void fat_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
@@ -672,7 +671,7 @@ int fat_sync_inode(struct inode *inode)
 EXPORT_SYMBOL_GPL(fat_sync_inode);
-static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
+static int fat_show_options(struct seq_file *m, struct dentry *root);
 static const struct super_operations fat_sops = {
        .alloc_inode    = fat_alloc_inode,
        .destroy_inode  = fat_destroy_inode,
@@ -811,9 +810,9 @@ static const struct export_operations fat_export_ops = {
        .get_parent     = fat_get_parent,
 };
-static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int fat_show_options(struct seq_file *m, struct dentry *root)
 {
-        struct msdos_sb_info *sbi = MSDOS_SB(mnt->mnt_sb);
+        struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
        struct fat_mount_options *opts = &sbi->options;
        int isvfat = opts->isvfat;
@@ -898,7 +897,7 @@ enum {
        Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
        Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
        Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-        Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
+        Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
        Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
 };
@@ -928,17 +927,17 @@ static const match_table_t fat_tokens = {
        {Opt_err_panic, "errors=panic"},
        {Opt_err_ro, "errors=remount-ro"},
        {Opt_discard, "discard"},
-        {Opt_obsolate, "conv=binary"},
+        {Opt_obsolete, "conv=binary"},
-        {Opt_obsolate, "conv=text"},
+        {Opt_obsolete, "conv=text"},
-        {Opt_obsolate, "conv=auto"},
+        {Opt_obsolete, "conv=auto"},
-        {Opt_obsolate, "conv=b"},
+        {Opt_obsolete, "conv=b"},
-        {Opt_obsolate, "conv=t"},
+        {Opt_obsolete, "conv=t"},
-        {Opt_obsolate, "conv=a"},
+        {Opt_obsolete, "conv=a"},
-        {Opt_obsolate, "fat=%u"},
+        {Opt_obsolete, "fat=%u"},
-        {Opt_obsolate, "blocksize=%u"},
+        {Opt_obsolete, "blocksize=%u"},
-        {Opt_obsolate, "cvf_format=%20s"},
+        {Opt_obsolete, "cvf_format=%20s"},
-        {Opt_obsolate, "cvf_options=%100s"},
+        {Opt_obsolete, "cvf_options=%100s"},
-        {Opt_obsolate, "posix"},
+        {Opt_obsolete, "posix"},
        {Opt_err, NULL},
 };
 static const match_table_t msdos_tokens = {
@@ -1170,7 +1169,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
                        break;
                /* obsolete mount options */
-                case Opt_obsolate:
+                case Opt_obsolete:
                        fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
                               "not supported now", p);
                        break;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 216b419f30e2..c5938c9084b9 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -264,7 +264,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 }
 /***** Create a file */
-static int msdos_create(struct inode *dir, struct dentry *dentry, int mode,
+static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                        struct nameidata *nd)
 {
        struct super_block *sb = dir->i_sb;
@@ -346,7 +346,7 @@ out:
 }
 /***** Make a directory */
-static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct fat_slot_info sinfo;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c25cf151b84b..a81eb2367d39 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -782,7 +782,7 @@ error:
        return ERR_PTR(err);
 }
-static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
+static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                       struct nameidata *nd)
 {
        struct super_block *sb = dir->i_sb;
@@ -871,7 +871,7 @@ out:
        return err;
 }
-static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6b088641f5bf..a48e4a139be1 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -10,6 +10,7 @@
 #include <linux/personality.h>
 #include <asm/uaccess.h>
 #include "internal.h"
+#include "mount.h"
 static long do_sys_name_to_handle(struct path *path,
                                  struct file_handle __user *ufh,
@@ -24,8 +25,8 @@ static long do_sys_name_to_handle(struct path *path,
         * We need t make sure wether the file system
         * support decoding of the file handle
         */
-        if (!path->mnt->mnt_sb->s_export_op ||
+        if (!path->dentry->d_sb->s_export_op ||
-            !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
+            !path->dentry->d_sb->s_export_op->fh_to_dentry)
                return -EOPNOTSUPP;
        if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
@@ -66,7 +67,8 @@ static long do_sys_name_to_handle(struct path *path,
        } else
                retval = 0;
        /* copy the mount id */
-        if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
+        if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id,
+                         sizeof(*mnt_id)) ||
            copy_to_user(ufh, handle,
                         sizeof(struct file_handle) + handle_bytes))
                retval = -EFAULT;
diff --git a/fs/file_table.c b/fs/file_table.c
index c322794f7360..20002e39754d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -474,29 +474,6 @@ void file_sb_list_del(struct file *file)
 #endif
-int fs_may_remount_ro(struct super_block *sb)
-{
-        struct file *file;
-        /* Check that no files are currently opened for writing. */
-        lg_global_lock(files_lglock);
-        do_file_list_for_each_entry(sb, file) {
-                struct inode *inode = file->f_path.dentry->d_inode;
-                /* File with pending delete? */
-                if (inode->i_nlink == 0)
-                        goto too_bad;
-                /* Writeable file? */
-                if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-                        goto too_bad;
-        } while_file_list_for_each_entry;
-        lg_global_unlock(files_lglock);
-        return 1; /* Tis' cool bro. */
-too_bad:
-        lg_global_unlock(files_lglock);
-        return 0;
-}
 /**
 *      mark_files_ro - mark all files read-only
 *      @sb: superblock in question
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 0845f84f2a5f..96f24286667a 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -74,7 +74,6 @@ int register_filesystem(struct file_system_type * fs)
        BUG_ON(strchr(fs->name, '.'));
        if (fs->next)
                return -EBUSY;
-        INIT_LIST_HEAD(&fs->fs_supers);
        write_lock(&file_systems_lock);
        p = find_filesystem(fs->name, strlen(fs->name));
        if (*p)
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 7b2af5abe2fa..cf9ef918a2a9 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -187,10 +187,10 @@ vxfs_stiget(struct super_block *sbp, ino_t ino)
 *  vxfs_transmod returns a Linux mode_t for a given
 *  VxFS inode structure.
 */
-static __inline__ mode_t
+static __inline__ umode_t
 vxfs_transmod(struct vxfs_inode_info *vip)
 {
-        mode_t                  ret = vip->vii_mode & ~VXFS_TYPE_MASK;
+        umode_t                 ret = vip->vii_mode & ~VXFS_TYPE_MASK;
        if (VXFS_ISFIFO(vip))
                ret |= S_IFIFO;
@@ -340,7 +340,6 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
 static void vxfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(vxfs_inode_cachep, inode->i_private);
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992b2bb4..e2951506434d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,7 +25,6 @@
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
-#include <linux/buffer_head.h>
 #include <linux/tracepoint.h>
 #include "internal.h"
@@ -47,17 +46,6 @@ struct wb_writeback_work {
        struct completion *done;        /* set if the caller waits */
 };
-const char *wb_reason_name[] = {
-        [WB_REASON_BACKGROUND]          = "background",
-        [WB_REASON_TRY_TO_FREE_PAGES]   = "try_to_free_pages",
-        [WB_REASON_SYNC]                = "sync",
-        [WB_REASON_PERIODIC]            = "periodic",
-        [WB_REASON_LAPTOP_TIMER]        = "laptop_timer",
-        [WB_REASON_FREE_MORE_MEM]       = "free_more_memory",
-        [WB_REASON_FS_FREE_SPACE]       = "fs_free_space",
-        [WB_REASON_FORKER_THREAD]       = "forker_thread"
-};
 /*
 * Include the creation of the trace points after defining the
 * wb_writeback_work structure so that the definition remains local to this
@@ -156,6 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 * bdi_start_writeback - start writeback
 * @bdi: the backing device to write from
 * @nr_pages: the number of pages to write
+ * @reason: reason why some writeback work was initiated
 *
 * Description:
 *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
@@ -947,7 +936,7 @@ int bdi_writeback_thread(void *data)
        trace_writeback_thread_start(bdi);
-        while (!kthread_should_stop()) {
+        while (!kthread_freezable_should_stop(NULL)) {
                /*
                 * Remove own delayed wake-up timer, since we are already awake
                 * and we'll take care of the preriodic write-back.
@@ -977,8 +966,6 @@ int bdi_writeback_thread(void *data)
                         */
                        schedule();
                }
-                try_to_freeze();
        }
        /* Flush any work that raced with us exiting */
@@ -1223,6 +1210,7 @@ static void wait_sb_inodes(struct super_block *sb)
 * writeback_inodes_sb_nr -     writeback dirty inodes from given super_block
 * @sb: the superblock
 * @nr: the number of pages to write
+ * @reason: reason why some writeback work initiated
 *
 * Start writeback on some inodes on this super_block. No guarantees are made
 * on how many (if any) will be written, and this function does not wait
@@ -1251,6 +1239,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
 /**
 * writeback_inodes_sb  -       writeback dirty inodes from given super_block
 * @sb: the superblock
+ * @reason: reason why some writeback work was initiated
 *
 * Start writeback on some inodes on this super_block. No guarantees are made
 * on how many (if any) will be written, and this function does not wait
@@ -1265,6 +1254,7 @@ EXPORT_SYMBOL(writeback_inodes_sb);
 /**
 * writeback_inodes_sb_if_idle  -       start writeback if none underway
 * @sb: the superblock
+ * @reason: reason why some writeback work was initiated
 *
 * Invoke writeback_inodes_sb if no writeback is currently underway.
 * Returns 1 if writeback was started, 0 if not.
@@ -1285,6 +1275,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 * writeback_inodes_sb_if_idle  -       start writeback if none underway
 * @sb: the superblock
 * @nr: the number of pages to write
+ * @reason: reason why some writeback work was initiated
 *
 * Invoke writeback_inodes_sb if no writeback is currently underway.
 * Returns 1 if writeback was started, 0 if not.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5cb8614508c3..2aaf3eaaf13d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1512,7 +1512,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        else if (outarg->offset + num > file_size)
                num = file_size - outarg->offset;
-        while (num) {
+        while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
                struct page *page;
                unsigned int this_num;
@@ -1526,6 +1526,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
                num -= this_num;
                total_len += this_num;
+                index++;
        }
        req->misc.retrieve_in.offset = outarg->offset;
        req->misc.retrieve_in.size = total_len;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9f63e493a9b6..5ddd6ea8f839 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -369,8 +369,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 * If the filesystem doesn't support this, then fall back to separate
 * 'mknod' + 'open' requests.
 */
-static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
+static int fuse_create_open(struct inode *dir, struct dentry *entry,
-                            struct nameidata *nd)
+                            umode_t mode, struct nameidata *nd)
 {
        int err;
        struct inode *inode;
@@ -480,7 +480,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 */
 static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
                            struct inode *dir, struct dentry *entry,
-                            int mode)
+                            umode_t mode)
 {
        struct fuse_entry_out outarg;
        struct inode *inode;
@@ -547,7 +547,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
        return err;
 }
-static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
                      dev_t rdev)
 {
        struct fuse_mknod_in inarg;
@@ -573,7 +573,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
        return create_new_entry(fc, req, dir, entry, mode);
 }
-static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
                       struct nameidata *nd)
 {
        if (nd) {
@@ -585,7 +585,7 @@ static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
        return fuse_mknod(dir, entry, mode, 0);
 }
-static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
 {
        struct fuse_mkdir_in inarg;
        struct fuse_conn *fc = get_fuse_conn(dir);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 594f07a81c28..0c84100acd44 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1556,7 +1556,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
        struct inode *inode = file->f_path.dentry->d_inode;
        mutex_lock(&inode->i_mutex);
-        if (origin != SEEK_CUR || origin != SEEK_SET) {
+        if (origin != SEEK_CUR && origin != SEEK_SET) {
                retval = fuse_update_attributes(inode, NULL, file, NULL);
                if (retval)
                        goto exit;
@@ -1567,6 +1567,10 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
                offset += i_size_read(inode);
                break;
        case SEEK_CUR:
+                if (offset == 0) {
+                        retval = file->f_pos;
+                        goto exit;
+                }
                offset += file->f_pos;
                break;
        case SEEK_DATA:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cf6db0a93219..1964da0257d9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -80,7 +80,7 @@ struct fuse_inode {
        /** The sticky bit in inode->i_mode may have been removed, so
            preserve the original mode */
-        mode_t orig_i_mode;
+        umode_t orig_i_mode;
        /** Version of last attribute change */
        u64 attr_version;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3e6d72756479..64cf8d07393e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -107,7 +107,6 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 static void fuse_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(fuse_inode_cachep, inode);
 }
@@ -498,9 +497,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
        return 1;
 }
-static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int fuse_show_options(struct seq_file *m, struct dentry *root)
 {
-        struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+        struct super_block *sb = root->d_sb;
+        struct fuse_conn *fc = get_fuse_conn_super(sb);
        seq_printf(m, ",user_id=%u", fc->user_id);
        seq_printf(m, ",group_id=%u", fc->group_id);
@@ -510,9 +510,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
                seq_puts(m, ",allow_other");
        if (fc->max_read != ~0)
                seq_printf(m, ",max_read=%u", fc->max_read);
-        if (mnt->mnt_sb->s_bdev &&
+        if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
-            mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
+                seq_printf(m, ",blksize=%lu", sb->s_blocksize);
-                seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
        return 0;
 }
@@ -1138,28 +1137,28 @@ static int __init fuse_fs_init(void)
 {
        int err;
-        err = register_filesystem(&fuse_fs_type);
-        if (err)
-                goto out;
-        err = register_fuseblk();
-        if (err)
-                goto out_unreg;
        fuse_inode_cachep = kmem_cache_create("fuse_inode",
                                              sizeof(struct fuse_inode),
                                              0, SLAB_HWCACHE_ALIGN,
                                              fuse_inode_init_once);
        err = -ENOMEM;
        if (!fuse_inode_cachep)
-                goto out_unreg2;
+                goto out;
+        err = register_fuseblk();
+        if (err)
+                goto out2;
+        err = register_filesystem(&fuse_fs_type);
+        if (err)
+                goto out3;
        return 0;
- out_unreg2:
+ out3:
        unregister_fuseblk();
- out_unreg:
+ out2:
-        unregister_filesystem(&fuse_fs_type);
+        kmem_cache_destroy(fuse_inode_cachep);
 out:
        return err;
 }
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 65978d7885c8..230eb0f005b6 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -38,8 +38,9 @@ static const char *gfs2_acl_name(int type)
        return NULL;
 }
-static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
+struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
 {
+        struct gfs2_inode *ip = GFS2_I(inode);
        struct posix_acl *acl;
        const char *name;
        char *data;
@@ -67,11 +68,6 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
        return acl;
 }
-struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
-{
-        return gfs2_acl_get(GFS2_I(inode), type);
-}
 static int gfs2_set_mode(struct inode *inode, umode_t mode)
 {
        int error = 0;
@@ -125,7 +121,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
        if (S_ISLNK(inode->i_mode))
                return 0;
-        acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT);
+        acl = gfs2_get_acl(&dip->i_inode, ACL_TYPE_DEFAULT);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (!acl) {
@@ -166,7 +162,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
        unsigned int len;
        int error;
-        acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS);
+        acl = gfs2_get_acl(&ip->i_inode, ACL_TYPE_ACCESS);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (!acl)
@@ -216,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
        if (type < 0)
                return type;
-        acl = gfs2_acl_get(GFS2_I(inode), type);
+        acl = gfs2_get_acl(inode, type);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (acl == NULL)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4858e1fed8b1..501e5cba09b3 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
        int alloc_required;
        int error = 0;
-        struct gfs2_alloc *al = NULL;
+        struct gfs2_qadata *qa = NULL;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        unsigned from = pos & (PAGE_CACHE_SIZE - 1);
        struct page *page;
@@ -639,8 +639,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
                gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
        if (alloc_required) {
-                al = gfs2_alloc_get(ip);
+                qa = gfs2_qadata_get(ip);
-                if (!al) {
+                if (!qa) {
                        error = -ENOMEM;
                        goto out_unlock;
                }
@@ -649,8 +649,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
                if (error)
                        goto out_alloc_put;
-                al->al_requested = data_blocks + ind_blocks;
+                error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
-                error = gfs2_inplace_reserve(ip);
                if (error)
                        goto out_qunlock;
        }
@@ -711,7 +710,7 @@ out_trans_fail:
 out_qunlock:
                gfs2_quota_unlock(ip);
 out_alloc_put:
-                gfs2_alloc_put(ip);
+                gfs2_qadata_put(ip);
        }
 out_unlock:
        if (&ip->i_inode == sdp->sd_rindex) {
@@ -848,7 +847,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
        struct buffer_head *dibh;
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
        unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
        unsigned int to = from + len;
        int ret;
@@ -880,10 +879,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
        brelse(dibh);
 failed:
        gfs2_trans_end(sdp);
-        if (al) {
+        if (ip->i_res)
                gfs2_inplace_release(ip);
+        if (qa) {
                gfs2_quota_unlock(ip);
-                gfs2_alloc_put(ip);
+                gfs2_qadata_put(ip);
        }
        if (inode == sdp->sd_rindex) {
                gfs2_glock_dq(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 41d494d79709..14a704015970 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -133,7 +133,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
                   and write it out to disk */
                unsigned int n = 1;
-                error = gfs2_alloc_block(ip, &block, &n);
+                error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
                if (error)
                        goto out_brelse;
                if (isdir) {
@@ -503,7 +503,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
        do {
                int error;
                n = blks - alloced;
-                error = gfs2_alloc_block(ip, &bn, &n);
+                error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
                if (error)
                        return error;
                alloced += n;
@@ -743,9 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        else if (ip->i_depth)
                revokes = sdp->sd_inptrs;
-        if (error)
-                return error;
        memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
        bstart = 0;
        blen = 0;
@@ -1044,7 +1041,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
                lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
        find_metapath(sdp, lblock, &mp, ip->i_height);
-        if (!gfs2_alloc_get(ip))
+        if (!gfs2_qadata_get(ip))
                return -ENOMEM;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1064,7 +1061,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
        gfs2_quota_unhold(ip);
 out:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
        return error;
 }
@@ -1166,21 +1163,20 @@ static int do_grow(struct inode *inode, u64 size)
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct buffer_head *dibh;
-        struct gfs2_alloc *al = NULL;
+        struct gfs2_qadata *qa = NULL;
        int error;
        if (gfs2_is_stuffed(ip) &&
            (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
-                al = gfs2_alloc_get(ip);
+                qa = gfs2_qadata_get(ip);
-                if (al == NULL)
+                if (qa == NULL)
                        return -ENOMEM;
                error = gfs2_quota_lock_check(ip);
                if (error)
                        goto do_grow_alloc_put;
-                al->al_requested = 1;
+                error = gfs2_inplace_reserve(ip, 1);
-                error = gfs2_inplace_reserve(ip);
                if (error)
                        goto do_grow_qunlock;
        }
@@ -1189,7 +1185,7 @@ static int do_grow(struct inode *inode, u64 size)
        if (error)
                goto do_grow_release;
-        if (al) {
+        if (qa) {
                error = gfs2_unstuff_dinode(ip, NULL);
                if (error)
                        goto do_end_trans;
@@ -1208,12 +1204,12 @@ static int do_grow(struct inode *inode, u64 size)
 do_end_trans:
        gfs2_trans_end(sdp);
 do_grow_release:
-        if (al) {
+        if (qa) {
                gfs2_inplace_release(ip);
 do_grow_qunlock:
                gfs2_quota_unlock(ip);
 do_grow_alloc_put:
-                gfs2_alloc_put(ip);
+                gfs2_qadata_put(ip);
        }
        return error;
 }
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8ccad2467cb6..c35573abd371 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -76,6 +76,8 @@
 #define IS_LEAF     1 /* Hashed (leaf) directory */
 #define IS_DINODE   2 /* Linear (stuffed dinode block) directory */
+#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
 #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
 #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
@@ -821,7 +823,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
        struct gfs2_dirent *dent;
        struct qstr name = { .name = "", .len = 0, .hash = 0 };
-        error = gfs2_alloc_block(ip, &bn, &n);
+        error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
        if (error)
                return NULL;
        bh = gfs2_meta_new(ip->i_gl, bn);
@@ -1376,6 +1378,52 @@ out:
        return error;
 }
+/**
+ * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks.
+ *
+ * Note: we can't calculate each index like dir_e_read can because we don't
+ * have the leaf, and therefore we don't have the depth, and therefore we
+ * don't have the length. So we have to just read enough ahead to make up
+ * for the loss of information.
+ */
+static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
+                               struct file_ra_state *f_ra)
+{
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_glock *gl = ip->i_gl;
+        struct buffer_head *bh;
+        u64 blocknr = 0, last;
+        unsigned count;
+        /* First check if we've already read-ahead for the whole range. */
+        if (index + MAX_RA_BLOCKS < f_ra->start)
+                return;
+        f_ra->start = max((pgoff_t)index, f_ra->start);
+        for (count = 0; count < MAX_RA_BLOCKS; count++) {
+                if (f_ra->start >= hsize) /* if exceeded the hash table */
+                        break;
+                last = blocknr;
+                blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]);
+                f_ra->start++;
+                if (blocknr == last)
+                        continue;
+                bh = gfs2_getbuf(gl, blocknr, 1);
+                if (trylock_buffer(bh)) {
+                        if (buffer_uptodate(bh)) {
+                                unlock_buffer(bh);
+                                brelse(bh);
+                                continue;
+                        }
+                        bh->b_end_io = end_buffer_read_sync;
+                        submit_bh(READA | REQ_META, bh);
+                        continue;
+                }
+                brelse(bh);
+        }
+}
 /**
 * dir_e_read - Reads the entries from a directory into a filldir buffer
@@ -1388,7 +1436,7 @@ out:
 */
 static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
-                      filldir_t filldir)
+                      filldir_t filldir, struct file_ra_state *f_ra)
 {
        struct gfs2_inode *dip = GFS2_I(inode);
        u32 hsize, len = 0;
@@ -1402,10 +1450,14 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
        hash = gfs2_dir_offset2hash(*offset);
        index = hash >> (32 - dip->i_depth);
+        if (dip->i_hash_cache == NULL)
+                f_ra->start = 0;
        lp = gfs2_dir_get_hash_table(dip);
        if (IS_ERR(lp))
                return PTR_ERR(lp);
+        gfs2_dir_readahead(inode, hsize, index, f_ra);
        while (index < hsize) {
                error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
                                           &copied, &depth,
@@ -1423,7 +1475,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
 }
 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
-                  filldir_t filldir)
+                  filldir_t filldir, struct file_ra_state *f_ra)
 {
        struct gfs2_inode *dip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1437,7 +1489,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                return 0;
        if (dip->i_diskflags & GFS2_DIF_EXHASH)
-                return dir_e_read(inode, offset, opaque, filldir);
+                return dir_e_read(inode, offset, opaque, filldir, f_ra);
        if (!gfs2_is_stuffed(dip)) {
                gfs2_consist_inode(dip);
@@ -1798,7 +1850,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        if (!ht)
                return -ENOMEM;
-        if (!gfs2_alloc_get(dip)) {
+        if (!gfs2_qadata_get(dip)) {
                error = -ENOMEM;
                goto out;
        }
@@ -1887,7 +1939,7 @@ out_rlist:
        gfs2_rlist_free(&rlist);
        gfs2_quota_unhold(dip);
 out_put:
-        gfs2_alloc_put(dip);
+        gfs2_qadata_put(dip);
 out:
        kfree(ht);
        return error;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index ff5772fbf024..98c960beab35 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -25,7 +25,7 @@ extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
                        const struct gfs2_inode *ip);
 extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
 extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
-                         filldir_t filldir);
+                         filldir_t filldir, struct file_ra_state *f_ra);
 extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                          const struct gfs2_inode *nip, unsigned int new_type);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index fe9945f2ff72..70ba891654f8 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -99,6 +99,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
        struct gfs2_holder gh;
        u64 offset = 0;
        int error;
+        struct file_ra_state f_ra = { .start = 0 };
        if (!dir)
                return -EINVAL;
@@ -118,7 +119,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
        if (error)
                return error;
-        error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
+        error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra);
        gfs2_glock_dq_uninit(&gh);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ce36a56dfeac..c5fb3597f696 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -105,7 +105,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
                return error;
        }
-        error = gfs2_dir_read(dir, &offset, dirent, filldir);
+        error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra);
        gfs2_glock_dq_uninit(&d_gh);
@@ -223,7 +223,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
        int error;
        u32 new_flags, flags;
-        error = mnt_want_write(filp->f_path.mnt);
+        error = mnt_want_write_file(filp);
        if (error)
                return error;
@@ -285,7 +285,7 @@ out_trans_end:
 out:
        gfs2_glock_dq_uninit(&gh);
 out_drop_write:
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
        return error;
 }
@@ -365,7 +365,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 pos = page->index << PAGE_CACHE_SHIFT;
        unsigned int data_blocks, ind_blocks, rblocks;
        struct gfs2_holder gh;
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        loff_t size;
        int ret;
@@ -393,16 +393,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        }
        ret = -ENOMEM;
-        al = gfs2_alloc_get(ip);
+        qa = gfs2_qadata_get(ip);
-        if (al == NULL)
+        if (qa == NULL)
                goto out_unlock;
        ret = gfs2_quota_lock_check(ip);
        if (ret)
                goto out_alloc_put;
        gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
-        al->al_requested = data_blocks + ind_blocks;
+        ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
-        ret = gfs2_inplace_reserve(ip);
        if (ret)
                goto out_quota_unlock;
@@ -448,7 +447,7 @@ out_trans_fail:
 out_quota_unlock:
        gfs2_quota_unlock(ip);
 out_alloc_put:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
 out_unlock:
        gfs2_glock_dq(&gh);
 out:
@@ -609,7 +608,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
        struct inode *inode = mapping->host;
        int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
        struct gfs2_inode *ip = GFS2_I(inode);
-        int ret, ret1 = 0;
+        int ret = 0, ret1 = 0;
        if (mapping->nrpages) {
                ret1 = filemap_fdatawrite_range(mapping, start, end);
@@ -750,8 +749,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
        loff_t bytes, max_bytes;
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        int error;
+        const loff_t pos = offset;
+        const loff_t count = len;
        loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
        loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
        loff_t max_chunk_size = UINT_MAX & bsize_mask;
@@ -782,8 +783,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
        while (len > 0) {
                if (len < bytes)
                        bytes = len;
-                al = gfs2_alloc_get(ip);
+                qa = gfs2_qadata_get(ip);
-                if (!al) {
+                if (!qa) {
                        error = -ENOMEM;
                        goto out_unlock;
                }
@@ -795,8 +796,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 retry:
                gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
-                al->al_requested = data_blocks + ind_blocks;
+                error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
-                error = gfs2_inplace_reserve(ip);
                if (error) {
                        if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
                                bytes >>= 1;
@@ -810,7 +810,6 @@ retry:
                max_bytes = bytes;
                calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
                                &max_bytes, &data_blocks, &ind_blocks);
-                al->al_requested = data_blocks + ind_blocks;
                rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
                          RES_RG_HDR + gfs2_rg_blocks(ip);
@@ -832,8 +831,11 @@ retry:
                offset += max_bytes;
                gfs2_inplace_release(ip);
                gfs2_quota_unlock(ip);
-                gfs2_alloc_put(ip);
+                gfs2_qadata_put(ip);
        }
+        if (error == 0)
+                error = generic_write_sync(file, pos, count);
        goto out_unlock;
 out_trans_fail:
@@ -841,7 +843,7 @@ out_trans_fail:
 out_qunlock:
        gfs2_quota_unlock(ip);
 out_alloc_put:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
 out_unlock:
        gfs2_glock_dq(&ip->i_gh);
 out_uninit:
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7389dfdcc9ef..e1d3bb59945c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -244,17 +244,16 @@ struct gfs2_glock {
 #define GFS2_MIN_LVB_SIZE 32    /* Min size of LVB that gfs2 supports */
-struct gfs2_alloc {
+struct gfs2_qadata { /* quota allocation data */
        /* Quota stuff */
-        struct gfs2_quota_data *al_qd[2*MAXQUOTAS];
+        struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
-        struct gfs2_holder al_qd_ghs[2*MAXQUOTAS];
+        struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
-        unsigned int al_qd_num;
+        unsigned int qa_qd_num;
+};
-        u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-        u32 al_alloced; /* Filled in by gfs2_alloc_*() */
-        /* Filled in by gfs2_inplace_reserve() */
+struct gfs2_blkreserv {
-        struct gfs2_holder al_rgd_gh;
+        u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
+        struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
 };
 enum {
@@ -275,7 +274,8 @@ struct gfs2_inode {
        struct gfs2_glock *i_gl; /* Move into i_gh? */
        struct gfs2_holder i_iopen_gh;
        struct gfs2_holder i_gh; /* for prepare/commit_write only */
-        struct gfs2_alloc *i_alloc;
+        struct gfs2_qadata *i_qadata; /* quota allocation data */
+        struct gfs2_blkreserv *i_res; /* resource group block reservation */
        struct gfs2_rgrpd *i_rgd;
        u64 i_goal;     /* goal block for allocations */
        struct rw_semaphore i_rw_mutex;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index cfd4959b218c..017960cf1d7a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -333,7 +333,7 @@ out:
 */
 static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
-                     unsigned int mode)
+                     umode_t mode)
 {
        int error;
@@ -364,7 +364,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
        return 0;
 }
-static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
+static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode,
                               unsigned int *uid, unsigned int *gid)
 {
        if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
@@ -389,12 +389,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        int error;
+        int dblocks = 1;
-        if (gfs2_alloc_get(dip) == NULL)
+        error = gfs2_rindex_update(sdp);
-                return -ENOMEM;
+        if (error)
+                fs_warn(sdp, "rindex update returns %d\n", error);
-        dip->i_alloc->al_requested = RES_DINODE;
+        error = gfs2_inplace_reserve(dip, RES_DINODE);
-        error = gfs2_inplace_reserve(dip);
        if (error)
                goto out;
@@ -402,14 +403,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
        if (error)
                goto out_ipreserv;
-        error = gfs2_alloc_di(dip, no_addr, generation);
+        error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation);
        gfs2_trans_end(sdp);
 out_ipreserv:
        gfs2_inplace_release(dip);
 out:
-        gfs2_alloc_put(dip);
        return error;
 }
@@ -447,7 +447,7 @@ static void gfs2_init_dir(struct buffer_head *dibh,
 */
 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-                        const struct gfs2_inum_host *inum, unsigned int mode,
+                        const struct gfs2_inum_host *inum, umode_t mode,
                        unsigned int uid, unsigned int gid,
                        const u64 *generation, dev_t dev, const char *symname,
                        unsigned size, struct buffer_head **bhp)
@@ -516,7 +516,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 }
 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-                       unsigned int mode, const struct gfs2_inum_host *inum,
+                       umode_t mode, const struct gfs2_inum_host *inum,
                       const u64 *generation, dev_t dev, const char *symname,
                       unsigned int size, struct buffer_head **bhp)
 {
@@ -525,7 +525,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        int error;
        munge_mode_uid_gid(dip, &mode, &uid, &gid);
-        if (!gfs2_alloc_get(dip))
+        if (!gfs2_qadata_get(dip))
                return -ENOMEM;
        error = gfs2_quota_lock(dip, uid, gid);
@@ -547,7 +547,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
 out_quota:
        gfs2_quota_unlock(dip);
 out:
-        gfs2_alloc_put(dip);
+        gfs2_qadata_put(dip);
        return error;
 }
@@ -555,13 +555,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                       struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        int alloc_required;
        struct buffer_head *dibh;
        int error;
-        al = gfs2_alloc_get(dip);
+        qa = gfs2_qadata_get(dip);
-        if (!al)
+        if (!qa)
                return -ENOMEM;
        error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -576,9 +576,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                if (error)
                        goto fail_quota_locks;
-                al->al_requested = sdp->sd_max_dirres;
+                error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
-                error = gfs2_inplace_reserve(dip);
                if (error)
                        goto fail_quota_locks;
@@ -619,11 +617,11 @@ fail_quota_locks:
        gfs2_quota_unlock(dip);
 fail:
-        gfs2_alloc_put(dip);
+        gfs2_qadata_put(dip);
        return error;
 }
-int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
                    void *fs_info)
 {
        const struct xattr *xattr;
@@ -659,7 +657,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
 */
 static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
-                             unsigned int mode, dev_t dev, const char *symname,
+                             umode_t mode, dev_t dev, const char *symname,
                             unsigned int size, int excl)
 {
        const struct qstr *name = &dentry->d_name;
@@ -728,9 +726,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
                brelse(bh);
        gfs2_trans_end(sdp);
-        gfs2_inplace_release(dip);
+        /* Check if we reserved space in the rgrp. Function link_dinode may
+           not, depending on whether alloc is required. */
+        if (dip->i_res)
+                gfs2_inplace_release(dip);
        gfs2_quota_unlock(dip);
-        gfs2_alloc_put(dip);
+        gfs2_qadata_put(dip);
        mark_inode_dirty(inode);
        gfs2_glock_dq_uninit_m(2, ghs);
        d_instantiate(dentry, inode);
@@ -760,7 +761,7 @@ fail:
 */
 static int gfs2_create(struct inode *dir, struct dentry *dentry,
-                       int mode, struct nameidata *nd)
+                       umode_t mode, struct nameidata *nd)
 {
        int excl = 0;
        if (nd && (nd->flags & LOOKUP_EXCL))
@@ -875,8 +876,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        error = 0;
        if (alloc_required) {
-                struct gfs2_alloc *al = gfs2_alloc_get(dip);
+                struct gfs2_qadata *qa = gfs2_qadata_get(dip);
-                if (!al) {
+                if (!qa) {
                        error = -ENOMEM;
                        goto out_gunlock;
                }
@@ -885,9 +887,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                if (error)
                        goto out_alloc;
-                al->al_requested = sdp->sd_max_dirres;
+                error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
-                error = gfs2_inplace_reserve(dip);
                if (error)
                        goto out_gunlock_q;
@@ -930,7 +930,7 @@ out_gunlock_q:
                gfs2_quota_unlock(dip);
 out_alloc:
        if (alloc_required)
-                gfs2_alloc_put(dip);
+                gfs2_qadata_put(dip);
 out_gunlock:
        gfs2_glock_dq(ghs + 1);
 out_child:
@@ -1037,12 +1037,14 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
        struct buffer_head *bh;
        struct gfs2_holder ghs[3];
        struct gfs2_rgrpd *rgd;
-        int error;
+        int error = -EROFS;
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
+        if (!rgd)
+                goto out_inodes;
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
@@ -1088,12 +1090,13 @@ out_end_trans:
 out_gunlock:
        gfs2_glock_dq(ghs + 2);
 out_rgrp:
-        gfs2_holder_uninit(ghs + 2);
        gfs2_glock_dq(ghs + 1);
 out_child:
-        gfs2_holder_uninit(ghs + 1);
        gfs2_glock_dq(ghs);
 out_parent:
+        gfs2_holder_uninit(ghs + 2);
+out_inodes:
+        gfs2_holder_uninit(ghs + 1);
        gfs2_holder_uninit(ghs);
        return error;
 }
@@ -1129,7 +1132,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 * Returns: errno
 */
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
 }
@@ -1143,7 +1146,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 *
 */
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
                      dev_t dev)
 {
        return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
@@ -1350,8 +1353,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        error = 0;
        if (alloc_required) {
-                struct gfs2_alloc *al = gfs2_alloc_get(ndip);
+                struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
-                if (!al) {
+                if (!qa) {
                        error = -ENOMEM;
                        goto out_gunlock;
                }
@@ -1360,9 +1364,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_alloc;
-                al->al_requested = sdp->sd_max_dirres;
+                error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
-                error = gfs2_inplace_reserve(ndip);
                if (error)
                        goto out_gunlock_q;
@@ -1423,7 +1425,7 @@ out_gunlock_q:
                gfs2_quota_unlock(ndip);
 out_alloc:
        if (alloc_required)
-                gfs2_alloc_put(ndip);
+                gfs2_qadata_put(ndip);
 out_gunlock:
        while (x--) {
                gfs2_glock_dq(ghs + x);
@@ -1584,7 +1586,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
        if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
                ogid = ngid = NO_QUOTA_CHANGE;
-        if (!gfs2_alloc_get(ip))
+        if (!gfs2_qadata_get(ip))
                return -ENOMEM;
        error = gfs2_quota_lock(ip, nuid, ngid);
@@ -1616,7 +1618,7 @@ out_end_trans:
 out_gunlock_q:
        gfs2_quota_unlock(ip);
 out_alloc:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
        return error;
 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 598646434362..756fae9eaf8f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -626,7 +626,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
                submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
        else
-                submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh);
+                submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
        wait_on_buffer(bh);
        if (!buffer_uptodate(bh))
@@ -951,8 +951,8 @@ int gfs2_logd(void *data)
                        wake_up(&sdp->sd_log_waitq);
                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
-                if (freezing(current))
-                        refrigerator();
+                try_to_freeze();
                do {
                        prepare_to_wait(&sdp->sd_logd_waitq, &wait,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 8a139ff1919f..c150298e2d8e 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -40,7 +40,8 @@ static void gfs2_init_inode_once(void *foo)
        inode_init_once(&ip->i_inode);
        init_rwsem(&ip->i_rw_mutex);
        INIT_LIST_HEAD(&ip->i_trunc_list);
-        ip->i_alloc = NULL;
+        ip->i_qadata = NULL;
+        ip->i_res = NULL;
        ip->i_hash_cache = NULL;
 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index be29858900f6..181586e673f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
        if (buffer_uptodate(first_bh))
                goto out;
        if (!buffer_locked(first_bh))
-                ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh);
+                ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
        dblock++;
        extlen--;
@@ -444,7 +444,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
                bh = gfs2_getbuf(gl, dblock, CREATE);
                if (!buffer_uptodate(bh) && !buffer_locked(bh))
-                        ll_rw_block(READA, 1, &bh);
+                        ll_rw_block(READA | REQ_META, 1, &bh);
                brelse(bh);
                dblock++;
                extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cb23c2be731a..fe72e79e6ff9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
        bio->bi_end_io = end_bio_io_page;
        bio->bi_private = page;
-        submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio);
+        submit_bio(READ_SYNC | REQ_META, bio);
        wait_on_page_locked(page);
        bio_put(bio);
        if (!PageUptodate(page)) {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7e528dc14f85..a45b21b03915 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,11 @@ static void qdsb_put(struct gfs2_quota_data *qd)
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
-        struct gfs2_quota_data **qd = al->al_qd;
+        struct gfs2_quota_data **qd = qa->qa_qd;
        int error;
-        if (gfs2_assert_warn(sdp, !al->al_qd_num) ||
+        if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
            gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
                return -EIO;
@@ -508,20 +508,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
        error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
        if (error)
                goto out;
-        al->al_qd_num++;
+        qa->qa_qd_num++;
        qd++;
        error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
        if (error)
                goto out;
-        al->al_qd_num++;
+        qa->qa_qd_num++;
        qd++;
        if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
                error = qdsb_get(sdp, QUOTA_USER, uid, qd);
                if (error)
                        goto out;
-                al->al_qd_num++;
+                qa->qa_qd_num++;
                qd++;
        }
@@ -529,7 +529,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
                error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
                if (error)
                        goto out;
-                al->al_qd_num++;
+                qa->qa_qd_num++;
                qd++;
        }
@@ -542,16 +542,16 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
        unsigned int x;
        gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
-        for (x = 0; x < al->al_qd_num; x++) {
+        for (x = 0; x < qa->qa_qd_num; x++) {
-                qdsb_put(al->al_qd[x]);
+                qdsb_put(qa->qa_qd[x]);
-                al->al_qd[x] = NULL;
+                qa->qa_qd[x] = NULL;
        }
-        al->al_qd_num = 0;
+        qa->qa_qd_num = 0;
 }
 static int sort_qd(const void *a, const void *b)
@@ -712,7 +712,7 @@ get_a_page:
                set_buffer_uptodate(bh);
        if (!buffer_uptodate(bh)) {
-                ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
+                ll_rw_block(READ | REQ_META, 1, &bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh))
                        goto unlock_out;
@@ -762,7 +762,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        struct gfs2_quota_data *qd;
        loff_t offset;
        unsigned int nalloc = 0, blocks;
-        struct gfs2_alloc *al = NULL;
        int error;
        gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
@@ -792,26 +791,19 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                        nalloc++;
        }
-        al = gfs2_alloc_get(ip);
-        if (!al) {
-                error = -ENOMEM;
-                goto out_gunlock;
-        }
        /* 
         * 1 blk for unstuffing inode if stuffed. We add this extra
         * block to the reservation unconditionally. If the inode
         * doesn't need unstuffing, the block will be released to the 
         * rgrp since it won't be allocated during the transaction
         */
-        al->al_requested = 1;
        /* +3 in the end for unstuffing block, inode size update block
         * and another block in case quota straddles page boundary and 
         * two blocks need to be updated instead of 1 */
        blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
-        if (nalloc)
+        error = gfs2_inplace_reserve(ip, 1 +
-                al->al_requested += nalloc * (data_blocks + ind_blocks);                
+                                     (nalloc * (data_blocks + ind_blocks)));
-        error = gfs2_inplace_reserve(ip);
        if (error)
                goto out_alloc;
@@ -840,8 +832,6 @@ out_end_trans:
 out_ipres:
        gfs2_inplace_release(ip);
 out_alloc:
-        gfs2_alloc_put(ip);
-out_gunlock:
        gfs2_glock_dq_uninit(&i_gh);
 out:
        while (qx--)
@@ -925,7 +915,7 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        unsigned int x;
        int error = 0;
@@ -938,15 +928,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
            sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
-        sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *),
+        sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
             sort_qd, NULL);
-        for (x = 0; x < al->al_qd_num; x++) {
+        for (x = 0; x < qa->qa_qd_num; x++) {
                int force = NO_FORCE;
-                qd = al->al_qd[x];
+                qd = qa->qa_qd[x];
                if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
                        force = FORCE;
-                error = do_glock(qd, force, &al->al_qd_ghs[x]);
+                error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
                if (error)
                        break;
        }
@@ -955,7 +945,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
                set_bit(GIF_QD_LOCKED, &ip->i_flags);
        else {
                while (x--)
-                        gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
+                        gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
                gfs2_quota_unhold(ip);
        }
@@ -1000,7 +990,7 @@ static int need_sync(struct gfs2_quota_data *qd)
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qda[4];
        unsigned int count = 0;
        unsigned int x;
@@ -1008,14 +998,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
        if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
                goto out;
-        for (x = 0; x < al->al_qd_num; x++) {
+        for (x = 0; x < qa->qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
                int sync;
-                qd = al->al_qd[x];
+                qd = qa->qa_qd[x];
                sync = need_sync(qd);
-                gfs2_glock_dq_uninit(&al->al_qd_ghs[x]);
+                gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
                if (sync && qd_trylock(qd))
                        qda[count++] = qd;
@@ -1048,7 +1038,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        s64 value;
        unsigned int x;
@@ -1060,8 +1050,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
        if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
-        for (x = 0; x < al->al_qd_num; x++) {
+        for (x = 0; x < qa->qa_qd_num; x++) {
-                qd = al->al_qd[x];
+                qd = qa->qa_qd[x];
                if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                      (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1099,7 +1089,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
                       u32 uid, u32 gid)
 {
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_qadata *qa = ip->i_qadata;
        struct gfs2_quota_data *qd;
        unsigned int x;
@@ -1108,8 +1098,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
                return;
-        for (x = 0; x < al->al_qd_num; x++) {
+        for (x = 0; x < qa->qa_qd_num; x++) {
-                qd = al->al_qd[x];
+                qd = qa->qa_qd[x];
                if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
                    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
@@ -1427,8 +1417,8 @@ int gfs2_quotad(void *data)
                /* Check for & recover partially truncated inodes */
                quotad_check_trunc_list(sdp);
-                if (freezing(current))
+                try_to_freeze();
-                        refrigerator();
                t = min(quotad_timeo, statfs_timeo);
                prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
@@ -1529,7 +1519,6 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        unsigned int data_blocks, ind_blocks;
        unsigned int blocks = 0;
        int alloc_required;
-        struct gfs2_alloc *al;
        loff_t offset;
        int error;
@@ -1594,15 +1583,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        if (gfs2_is_stuffed(ip))
                alloc_required = 1;
        if (alloc_required) {
-                al = gfs2_alloc_get(ip);
-                if (al == NULL)
-                        goto out_i;
                gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
                                       &data_blocks, &ind_blocks);
-                blocks = al->al_requested = 1 + data_blocks + ind_blocks;
+                blocks = 1 + data_blocks + ind_blocks;
-                error = gfs2_inplace_reserve(ip);
+                error = gfs2_inplace_reserve(ip, blocks);
                if (error)
-                        goto out_alloc;
+                        goto out_i;
                blocks += gfs2_rg_blocks(ip);
        }
@@ -1617,11 +1603,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
        gfs2_trans_end(sdp);
 out_release:
-        if (alloc_required) {
+        if (alloc_required)
                gfs2_inplace_release(ip);
-out_alloc:
-                gfs2_alloc_put(ip);
-        }
 out_i:
        gfs2_glock_dq_uninit(&i_gh);
 out_q:
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 96bd6d759f29..22234627f684 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -65,8 +65,8 @@ static const char valid_change[16] = {
 };
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-                        unsigned char old_state, unsigned char new_state,
+                        unsigned char old_state,
-                        unsigned int *n);
+                        struct gfs2_bitmap **rbi);
 /**
 * gfs2_setbit - Set a bit in the bitmaps
@@ -860,22 +860,36 @@ fail:
 }
 /**
- * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
+ * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
 * @ip: the incore GFS2 inode structure
 *
- * Returns: the struct gfs2_alloc
+ * Returns: the struct gfs2_qadata
 */
-struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
+struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        int error;
-        BUG_ON(ip->i_alloc != NULL);
+        BUG_ON(ip->i_qadata != NULL);
-        ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS);
+        ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
        error = gfs2_rindex_update(sdp);
        if (error)
                fs_warn(sdp, "rindex update returns %d\n", error);
-        return ip->i_alloc;
+        return ip->i_qadata;
+}
+/**
+ * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
+ * @ip: the incore GFS2 inode structure
+ *
+ * Returns: the struct gfs2_qadata
+ */
+static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip)
+{
+        BUG_ON(ip->i_res != NULL);
+        ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS);
+        return ip->i_res;
 }
 /**
@@ -890,15 +904,20 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
 static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
 {
-        const struct gfs2_alloc *al = ip->i_alloc;
+        const struct gfs2_blkreserv *rs = ip->i_res;
        if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
                return 0;
-        if (rgd->rd_free_clone >= al->al_requested)
+        if (rgd->rd_free_clone >= rs->rs_requested)
                return 1;
        return 0;
 }
+static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
+{
+        return (bi->bi_start * GFS2_NBBY) + blk;
+}
 /**
 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
 * @rgd: The rgrp
@@ -912,20 +931,20 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
        u32 goal = 0, block;
        u64 no_addr;
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-        unsigned int n;
        struct gfs2_glock *gl;
        struct gfs2_inode *ip;
        int error;
        int found = 0;
+        struct gfs2_bitmap *bi;
        while (goal < rgd->rd_data) {
                down_write(&sdp->sd_log_flush_lock);
-                n = 1;
+                block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi);
-                block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
-                                     GFS2_BLKST_UNLINKED, &n);
                up_write(&sdp->sd_log_flush_lock);
                if (block == BFITNOENT)
                        break;
+                block = gfs2_bi2rgd_blk(bi, block);
                /* rgblk_search can return a block < goal, so we need to
                   keep it marching forward. */
                no_addr = block + rgd->rd_data0;
@@ -977,8 +996,8 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_blkreserv *rs = ip->i_res;
-        int error, rg_locked;
+        int error, rg_locked, flags = LM_FLAG_TRY;
        int loops = 0;
        if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
@@ -997,7 +1016,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
                        error = 0;
                } else {
                        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-                                                   LM_FLAG_TRY, &al->al_rgd_gh);
+                                                   flags, &rs->rs_rgd_gh);
                }
                switch (error) {
                case 0:
@@ -1008,12 +1027,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
                        if (rgd->rd_flags & GFS2_RDF_CHECK)
                                try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
                        if (!rg_locked)
-                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
                        /* fall through */
                case GLR_TRYFAILED:
                        rgd = gfs2_rgrpd_get_next(rgd);
-                        if (rgd == begin)
+                        if (rgd == begin) {
+                                flags = 0;
                                loops++;
+                        }
                        break;
                default:
                        return error;
@@ -1023,6 +1044,13 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
        return -ENOSPC;
 }
+static void gfs2_blkrsv_put(struct gfs2_inode *ip)
+{
+        BUG_ON(ip->i_res == NULL);
+        kfree(ip->i_res);
+        ip->i_res = NULL;
+}
 /**
 * gfs2_inplace_reserve - Reserve space in the filesystem
 * @ip: the inode to reserve space for
@@ -1030,16 +1058,23 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 * Returns: errno
 */
-int gfs2_inplace_reserve(struct gfs2_inode *ip)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_blkreserv *rs;
        int error = 0;
        u64 last_unlinked = NO_BLOCK;
        int tries = 0;
-        if (gfs2_assert_warn(sdp, al->al_requested))
+        rs = gfs2_blkrsv_get(ip);
-                return -EINVAL;
+        if (!rs)
+                return -ENOMEM;
+        rs->rs_requested = requested;
+        if (gfs2_assert_warn(sdp, requested)) {
+                error = -EINVAL;
+                goto out;
+        }
        do {
                error = get_local_rgrp(ip, &last_unlinked);
@@ -1056,6 +1091,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip)
                gfs2_log_flush(sdp, NULL);
        } while (tries++ < 3);
+out:
+        if (error)
+                gfs2_blkrsv_put(ip);
        return error;
 }
@@ -1068,10 +1106,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip)
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
-        struct gfs2_alloc *al = ip->i_alloc;
+        struct gfs2_blkreserv *rs = ip->i_res;
-        if (al->al_rgd_gh.gh_gl)
+        gfs2_blkrsv_put(ip);
-                gfs2_glock_dq_uninit(&al->al_rgd_gh);
+        if (rs->rs_rgd_gh.gh_gl)
+                gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
 }
 /**
@@ -1108,39 +1147,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 }
 /**
- * rgblk_search - find a block in @old_state, change allocation
+ * rgblk_search - find a block in @state
- *           state to @new_state
 * @rgd: the resource group descriptor
 * @goal: the goal block within the RG (start here to search for avail block)
- * @old_state: GFS2_BLKST_XXX the before-allocation state to find
+ * @state: GFS2_BLKST_XXX the before-allocation state to find
- * @new_state: GFS2_BLKST_XXX the after-allocation block state
+ * @dinode: TRUE if the first block we allocate is for a dinode
- * @n: The extent length
+ * @rbi: address of the pointer to the bitmap containing the block found
 *
- * Walk rgrp's bitmap to find bits that represent a block in @old_state.
+ * Walk rgrp's bitmap to find bits that represent a block in @state.
- * Add the found bitmap buffer to the transaction.
- * Set the found bits to @new_state to change block's allocation state.
 *
 * This function never fails, because we wouldn't call it unless we
 * know (from reservation results, etc.) that a block is available.
 *
- * Scope of @goal and returned block is just within rgrp, not the whole
+ * Scope of @goal is just within rgrp, not the whole filesystem.
- * filesystem.
+ * Scope of @returned block is just within bitmap, not the whole filesystem.
 *
- * Returns:  the block number allocated
+ * Returns: the block number found relative to the bitmap rbi
 */
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-                        unsigned char old_state, unsigned char new_state,
+                        unsigned char state,
-                        unsigned int *n)
+                        struct gfs2_bitmap **rbi)
 {
        struct gfs2_bitmap *bi = NULL;
        const u32 length = rgd->rd_length;
        u32 blk = BFITNOENT;
        unsigned int buf, x;
-        const unsigned int elen = *n;
        const u8 *buffer = NULL;
-        *n = 0;
+        *rbi = NULL;
        /* Find bitmap block that contains bits for goal block */
        for (buf = 0; buf < length; buf++) {
                bi = rgd->rd_bits + buf;
@@ -1163,21 +1198,21 @@ do_search:
                bi = rgd->rd_bits + buf;
                if (test_bit(GBF_FULL, &bi->bi_flags) &&
-                    (old_state == GFS2_BLKST_FREE))
+                    (state == GFS2_BLKST_FREE))
                        goto skip;
                /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
                   bitmaps, so we must search the originals for that. */
                buffer = bi->bi_bh->b_data + bi->bi_offset;
                WARN_ON(!buffer_uptodate(bi->bi_bh));
-                if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
+                if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
                        buffer = bi->bi_clone + bi->bi_offset;
-                blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state);
+                blk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
                if (blk != BFITNOENT)
                        break;
-                if ((goal == 0) && (old_state == GFS2_BLKST_FREE))
+                if ((goal == 0) && (state == GFS2_BLKST_FREE))
                        set_bit(GBF_FULL, &bi->bi_flags);
                /* Try next bitmap block (wrap back to rgrp header if at end) */
@@ -1187,16 +1222,37 @@ skip:
                goal = 0;
        }
-        if (blk == BFITNOENT)
+        if (blk != BFITNOENT)
-                return blk;
+                *rbi = bi;
-        *n = 1;
+        return blk;
-        if (old_state == new_state)
+}
-                goto out;
+/**
+ * gfs2_alloc_extent - allocate an extent from a given bitmap
+ * @rgd: the resource group descriptor
+ * @bi: the bitmap within the rgrp
+ * @blk: the block within the bitmap
+ * @dinode: TRUE if the first block we allocate is for a dinode
+ * @n: The extent length
+ *
+ * Add the found bitmap buffer to the transaction.
+ * Set the found bits to @new_state to change block's allocation state.
+ * Returns: starting block number of the extent (fs scope)
+ */
+static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
+                             u32 blk, bool dinode, unsigned int *n)
+{
+        const unsigned int elen = *n;
+        u32 goal;
+        const u8 *buffer = NULL;
+        *n = 0;
+        buffer = bi->bi_bh->b_data + bi->bi_offset;
        gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
        gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-                    bi, blk, new_state);
+                    bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+        (*n)++;
        goal = blk;
        while (*n < elen) {
                goal++;
@@ -1206,11 +1262,12 @@ skip:
                    GFS2_BLKST_FREE)
                        break;
                gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
-                            bi, goal, new_state);
+                            bi, goal, GFS2_BLKST_USED);
                (*n)++;
        }
-out:
+        blk = gfs2_bi2rgd_blk(bi, blk);
-        return (bi->bi_start * GFS2_NBBY) + blk;
+        rgd->rd_last_alloc = blk + *n - 1;
+        return rgd->rd_data0 + blk;
 }
 /**
@@ -1298,121 +1355,93 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
 }
 /**
- * gfs2_alloc_block - Allocate one or more blocks
+ * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
 * @ip: the inode to allocate the block for
 * @bn: Used to return the starting block number
- * @n: requested number of blocks/extent length (value/result)
+ * @ndata: requested number of blocks/extent length (value/result)
+ * @dinode: 1 if we're allocating a dinode block, else 0
+ * @generation: the generation number of the inode
 *
 * Returns: 0 or error
 */
-int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
+int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
+                      bool dinode, u64 *generation)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct buffer_head *dibh;
-        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_rgrpd *rgd;
-        u32 goal, blk;
+        unsigned int ndata;
-        u64 block;
+        u32 goal, blk; /* block, within the rgrp scope */
+        u64 block; /* block, within the file system scope */
        int error;
+        struct gfs2_bitmap *bi;
        /* Only happens if there is a bug in gfs2, return something distinctive
         * to ensure that it is noticed.
         */
-        if (al == NULL)
+        if (ip->i_res == NULL)
                return -ECANCELED;
        rgd = ip->i_rgd;
-        if (rgrp_contains_block(rgd, ip->i_goal))
+        if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
                goal = ip->i_goal - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc;
-        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n);
+        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
        /* Since all blocks are reserved in advance, this shouldn't happen */
        if (blk == BFITNOENT)
                goto rgrp_error;
-        rgd->rd_last_alloc = blk;
+        block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
-        block = rgd->rd_data0 + blk;
+        ndata = *nblocks;
-        ip->i_goal = block + *n - 1;
+        if (dinode)
-        error = gfs2_meta_inode_buffer(ip, &dibh);
+                ndata--;
-        if (error == 0) {
-                struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+        if (!dinode) {
-                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                ip->i_goal = block + ndata - 1;
-                di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
+                error = gfs2_meta_inode_buffer(ip, &dibh);
-                brelse(dibh);
+                if (error == 0) {
+                        struct gfs2_dinode *di =
+                                (struct gfs2_dinode *)dibh->b_data;
+                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+                        di->di_goal_meta = di->di_goal_data =
+                                cpu_to_be64(ip->i_goal);
+                        brelse(dibh);
+                }
        }
-        if (rgd->rd_free < *n)
+        if (rgd->rd_free < *nblocks)
                goto rgrp_error;
-        rgd->rd_free -= *n;
+        rgd->rd_free -= *nblocks;
+        if (dinode) {
-        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
+                rgd->rd_dinodes++;
-        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-        al->al_alloced += *n;
-        gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
-        gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
-        rgd->rd_free_clone -= *n;
-        trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
-        *bn = block;
-        return 0;
-rgrp_error:
-        gfs2_rgrp_error(rgd);
-        return -EIO;
-}
-/**
- * gfs2_alloc_di - Allocate a dinode
- * @dip: the directory that the inode is going in
- * @bn: the block number which is allocated
- * @generation: the generation number of the inode
- *
- * Returns: 0 on success or error
- */
-int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
-{
-        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct gfs2_alloc *al = dip->i_alloc;
-        struct gfs2_rgrpd *rgd = dip->i_rgd;
-        u32 blk;
-        u64 block;
-        unsigned int n = 1;
-        blk = rgblk_search(rgd, rgd->rd_last_alloc,
-                           GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
-        /* Since all blocks are reserved in advance, this shouldn't happen */
-        if (blk == BFITNOENT)
-                goto rgrp_error;
-        rgd->rd_last_alloc = blk;
-        block = rgd->rd_data0 + blk;
-        if (rgd->rd_free == 0)
-                goto rgrp_error;
-        rgd->rd_free--;
-        rgd->rd_dinodes++;
-        *generation = rgd->rd_igeneration++;
-        if (*generation == 0)
                *generation = rgd->rd_igeneration++;
+                if (*generation == 0)
+                        *generation = rgd->rd_igeneration++;
+        }
        gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-        al->al_alloced++;
+        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
+        if (dinode)
+                gfs2_trans_add_unrevoke(sdp, block, 1);
-        gfs2_statfs_change(sdp, 0, -1, +1);
+        /*
-        gfs2_trans_add_unrevoke(sdp, block, 1);
+         * This needs reviewing to see why we cannot do the quota change
+         * at this point in the dinode case.
+         */
+        if (ndata)
+                gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
+                                  ip->i_inode.i_gid);
-        rgd->rd_free_clone--;
+        rgd->rd_free_clone -= *nblocks;
-        trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
+        trace_gfs2_block_alloc(ip, block, *nblocks,
+                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
        *bn = block;
        return 0;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index cf5c50180192..ceec9106cdf4 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -28,19 +28,19 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
 extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
 extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
-extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
+extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
-static inline void gfs2_alloc_put(struct gfs2_inode *ip)
+static inline void gfs2_qadata_put(struct gfs2_inode *ip)
 {
-        BUG_ON(ip->i_alloc == NULL);
+        BUG_ON(ip->i_qadata == NULL);
-        kfree(ip->i_alloc);
+        kfree(ip->i_qadata);
-        ip->i_alloc = NULL;
+        ip->i_qadata = NULL;
 }
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip);
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
 extern void gfs2_inplace_release(struct gfs2_inode *ip);
-extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
+extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
-extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
+                             bool dinode, u64 *generation);
 extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
 extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 71e420989f77..4553ce515f62 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1284,18 +1284,18 @@ static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
 /**
 * gfs2_show_options - Show mount options for /proc/mounts
 * @s: seq_file structure
- * @mnt: vfsmount
+ * @root: root of this (sub)tree
 *
 * Returns: 0 on success or error code
 */
-static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int gfs2_show_options(struct seq_file *s, struct dentry *root)
 {
-        struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
+        struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
        struct gfs2_args *args = &sdp->sd_args;
        int val;
-        if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+        if (is_ancestor(root, sdp->sd_master_dir))
                seq_printf(s, ",meta");
        if (args->ar_lockproto[0])
                seq_printf(s, ",lockproto=%s", args->ar_lockproto);
@@ -1399,8 +1399,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        struct gfs2_rgrpd *rgd;
+        struct gfs2_holder gh;
        int error;
        if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
@@ -1408,8 +1409,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
                return -EIO;
        }
-        al = gfs2_alloc_get(ip);
+        qa = gfs2_qadata_get(ip);
-        if (!al)
+        if (!qa)
                return -ENOMEM;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1423,8 +1424,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
                goto out_qs;
        }
-        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
-                                   &al->al_rgd_gh);
        if (error)
                goto out_qs;
@@ -1440,11 +1440,11 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
        gfs2_trans_end(sdp);
 out_rg_gunlock:
-        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+        gfs2_glock_dq_uninit(&gh);
 out_qs:
        gfs2_quota_unhold(ip);
 out:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
        return error;
 }
@@ -1582,7 +1582,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 static void gfs2_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(gfs2_inode_cachep, inode);
 }
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index f8f101ef600c..125d4572e1c0 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -30,9 +30,9 @@ struct gfs2_glock;
 * block, or all of the blocks in the rg, whichever is smaller */
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
 {
-        const struct gfs2_alloc *al = ip->i_alloc;
+        const struct gfs2_blkreserv *rs = ip->i_res;
-        if (al->al_requested < ip->i_rgd->rd_length)
+        if (rs->rs_requested < ip->i_rgd->rd_length)
-                return al->al_requested + 1;
+                return rs->rs_requested + 1;
        return ip->i_rgd->rd_length;
 }
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 71d7bf830c09..e9636591b5d5 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -321,11 +321,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
                               struct gfs2_ea_header *ea,
                               struct gfs2_ea_header *prev, int leave)
 {
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        int error;
-        al = gfs2_alloc_get(ip);
+        qa = gfs2_qadata_get(ip);
-        if (!al)
+        if (!qa)
                return -ENOMEM;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -336,7 +336,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        gfs2_quota_unhold(ip);
 out_alloc:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
        return error;
 }
@@ -549,9 +549,10 @@ int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata)
                goto out;
        error = gfs2_ea_get_copy(ip, &el, data, len);
-        if (error == 0)
+        if (error < 0)
-                error = len;
+                kfree(data);
-        *ppdata = data;
+        else
+                *ppdata = data;
 out:
        brelse(el.el_bh);
        return error;
@@ -609,7 +610,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
        u64 block;
        int error;
-        error = gfs2_alloc_block(ip, &block, &n);
+        error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
        if (error)
                return error;
        gfs2_trans_add_unrevoke(sdp, block, 1);
@@ -671,7 +672,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
                        int mh_size = sizeof(struct gfs2_meta_header);
                        unsigned int n = 1;
-                        error = gfs2_alloc_block(ip, &block, &n);
+                        error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
                        if (error)
                                return error;
                        gfs2_trans_add_unrevoke(sdp, block, 1);
@@ -708,21 +709,19 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                             unsigned int blks,
                             ea_skeleton_call_t skeleton_call, void *private)
 {
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        struct buffer_head *dibh;
        int error;
-        al = gfs2_alloc_get(ip);
+        qa = gfs2_qadata_get(ip);
-        if (!al)
+        if (!qa)
                return -ENOMEM;
        error = gfs2_quota_lock_check(ip);
        if (error)
                goto out;
-        al->al_requested = blks;
+        error = gfs2_inplace_reserve(ip, blks);
-        error = gfs2_inplace_reserve(ip);
        if (error)
                goto out_gunlock_q;
@@ -751,7 +750,7 @@ out_ipres:
 out_gunlock_q:
        gfs2_quota_unlock(ip);
 out:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
        return error;
 }
@@ -991,7 +990,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
        } else {
                u64 blk;
                unsigned int n = 1;
-                error = gfs2_alloc_block(ip, &blk, &n);
+                error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
                if (error)
                        return error;
                gfs2_trans_add_unrevoke(sdp, blk, 1);
@@ -1435,9 +1434,9 @@ out:
 static int ea_dealloc_block(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-        struct gfs2_alloc *al = ip->i_alloc;
        struct gfs2_rgrpd *rgd;
        struct buffer_head *dibh;
+        struct gfs2_holder gh;
        int error;
        rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr);
@@ -1446,8 +1445,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
                return -EIO;
        }
-        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
+        error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
-                                   &al->al_rgd_gh);
        if (error)
                return error;
@@ -1471,7 +1469,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
        gfs2_trans_end(sdp);
 out_gunlock:
-        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+        gfs2_glock_dq_uninit(&gh);
        return error;
 }
@@ -1484,11 +1482,11 @@ out_gunlock:
 int gfs2_ea_dealloc(struct gfs2_inode *ip)
 {
-        struct gfs2_alloc *al;
+        struct gfs2_qadata *qa;
        int error;
-        al = gfs2_alloc_get(ip);
+        qa = gfs2_qadata_get(ip);
-        if (!al)
+        if (!qa)
                return -ENOMEM;
        error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1510,7 +1508,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 out_quota:
        gfs2_quota_unhold(ip);
 out_alloc:
-        gfs2_alloc_put(ip);
+        gfs2_qadata_put(ip);
        return error;
 }
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index bce4eef91a06..62fc14ea4b73 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -186,7 +186,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
 * a directory and return a corresponding inode, given the inode for
 * the directory and the name (and its length) of the new file.
 */
-static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                      struct nameidata *nd)
 {
        struct inode *inode;
@@ -216,7 +216,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
 * in a directory, given the inode for the parent directory and the
 * name (and its length) of the new directory.
 */
-static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        int res;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index ad97c2d58287..1bf967c6bfdc 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -184,7 +184,7 @@ extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern const struct address_space_operations hfs_aops;
 extern const struct address_space_operations hfs_btree_aops;
-extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int);
+extern struct inode *hfs_new_inode(struct inode *, struct qstr *, umode_t);
 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
 extern int hfs_write_inode(struct inode *, struct writeback_control *);
 extern int hfs_inode_setattr(struct dentry *, struct iattr *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1a9fdcd2a00..737dbeb64320 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -169,7 +169,7 @@ const struct address_space_operations hfs_aops = {
 /*
 * hfs_new_inode
 */
-struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
+struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode = new_inode(sb);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1b55f704fb22..8137fb3e6780 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -133,9 +133,9 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
        return 0;
 }
-static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int hfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb);
+        struct hfs_sb_info *sbi = HFS_SB(root->d_sb);
        if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
                seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
@@ -170,7 +170,6 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
 static void hfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
 }
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4536cd3f15ae..88e155f895c6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -424,7 +424,7 @@ out:
 }
 static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
-                         int mode, dev_t rdev)
+                         umode_t mode, dev_t rdev)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
        struct inode *inode;
@@ -453,13 +453,13 @@ out:
        return res;
 }
-static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode,
+static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                          struct nameidata *nd)
 {
        return hfsplus_mknod(dir, dentry, mode, 0);
 }
-static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
 }
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d7674d051f52..21a5b7fc6db4 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -402,7 +402,7 @@ void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
 void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
 int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
 int hfsplus_cat_write_inode(struct inode *);
-struct inode *hfsplus_new_inode(struct super_block *, int);
+struct inode *hfsplus_new_inode(struct super_block *, umode_t);
 void hfsplus_delete_inode(struct inode *);
 int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
                       int datasync);
@@ -419,7 +419,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
 int hfsplus_parse_options_remount(char *input, int *force);
 void hfsplus_fill_defaults(struct hfsplus_sb_info *);
-int hfsplus_show_options(struct seq_file *, struct vfsmount *);
+int hfsplus_show_options(struct seq_file *, struct dentry *);
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 40e1413be4cf..6643b242bdd7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -378,7 +378,7 @@ static const struct file_operations hfsplus_file_operations = {
        .unlocked_ioctl = hfsplus_ioctl,
 };
-struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
+struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
        struct inode *inode = new_inode(sb);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index fbaa6690c8e0..f66c7655b3f7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -43,7 +43,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
        unsigned int flags;
        int err = 0;
-        err = mnt_want_write(file->f_path.mnt);
+        err = mnt_want_write_file(file);
        if (err)
                goto out;
@@ -94,7 +94,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
 out_unlock_inode:
        mutex_unlock(&inode->i_mutex);
 out_drop_write:
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
 out:
        return err;
 }
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index bb62a5882147..06fa5618600c 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -206,9 +206,9 @@ done:
        return 1;
 }
-int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt)
+int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb);
+        struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb);
        if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
                seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d24a9b666a23..edf0a801446b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -558,7 +558,6 @@ static void hfsplus_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
 }
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index bf15a43016b9..3cbfa93cd782 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -39,7 +39,7 @@
 struct hostfs_iattr {
        unsigned int    ia_valid;
-        mode_t          ia_mode;
+        unsigned short  ia_mode;
        uid_t           ia_uid;
        gid_t           ia_gid;
        loff_t          ia_size;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2f72da5ae686..e130bd46d671 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -250,7 +250,6 @@ static void hostfs_evict_inode(struct inode *inode)
 static void hostfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kfree(HOSTFS_I(inode));
 }
@@ -259,9 +258,9 @@ static void hostfs_destroy_inode(struct inode *inode)
        call_rcu(&inode->i_rcu, hostfs_i_callback);
 }
-static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-        const char *root_path = vfs->mnt_sb->s_fs_info;
+        const char *root_path = root->d_sb->s_fs_info;
        size_t offset = strlen(root_ino) + 1;
        if (strlen(root_path) > offset)
@@ -552,7 +551,7 @@ static int read_name(struct inode *ino, char *name)
        return 0;
 }
-int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
+int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                  struct nameidata *nd)
 {
        struct inode *inode;
@@ -677,7 +676,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
        return err;
 }
-int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, umode_t mode)
 {
        char *file;
        int err;
@@ -701,7 +700,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
        return err;
 }
-int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode *inode;
        char *name;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ea91fcb0ef9b..30dd7b10b507 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -8,7 +8,7 @@
 #include <linux/sched.h>
 #include "hpfs_fn.h"
-static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
@@ -115,7 +115,7 @@ bail:
        return err;
 }
-static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
@@ -201,7 +201,7 @@ bail:
        return err;
 }
-static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 98580a3b5005..3690467c944e 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -181,7 +181,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
 static void hpfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
 }
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f590b1160c6c..d92f4ce80925 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -622,7 +622,6 @@ void hppfs_evict_inode(struct inode *ino)
 static void hppfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kfree(HPPFS_I(inode));
 }
@@ -726,7 +725,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
        sb->s_fs_info = proc_mnt;
        err = -ENOMEM;
-        root_inode = get_inode(sb, dget(proc_mnt->mnt_sb->s_root));
+        root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
        if (!root_inode)
                goto out_mntput;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0be5a78598d0..e425ad9d0490 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -447,8 +447,8 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
        return 0;
 }
-static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 
+static struct inode *hugetlbfs_get_root(struct super_block *sb,
-                                        gid_t gid, int mode, dev_t dev)
+                                        struct hugetlbfs_config *config)
 {
        struct inode *inode;
@@ -456,9 +456,31 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
        if (inode) {
                struct hugetlbfs_inode_info *info;
                inode->i_ino = get_next_ino();
-                inode->i_mode = mode;
+                inode->i_mode = S_IFDIR | config->mode;
-                inode->i_uid = uid;
+                inode->i_uid = config->uid;
-                inode->i_gid = gid;
+                inode->i_gid = config->gid;
+                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+                info = HUGETLBFS_I(inode);
+                mpol_shared_policy_init(&info->policy, NULL);
+                inode->i_op = &hugetlbfs_dir_inode_operations;
+                inode->i_fop = &simple_dir_operations;
+                /* directory inodes start off with i_nlink == 2 (for "." entry) */
+                inc_nlink(inode);
+        }
+        return inode;
+}
+static struct inode *hugetlbfs_get_inode(struct super_block *sb,
+                                        struct inode *dir,
+                                        umode_t mode, dev_t dev)
+{
+        struct inode *inode;
+        inode = new_inode(sb);
+        if (inode) {
+                struct hugetlbfs_inode_info *info;
+                inode->i_ino = get_next_ino();
+                inode_init_owner(inode, dir, mode);
                inode->i_mapping->a_ops = &hugetlbfs_aops;
                inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -500,20 +522,12 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 * File creation. Allocate an inode, and we're done..
 */
 static int hugetlbfs_mknod(struct inode *dir,
-                        struct dentry *dentry, int mode, dev_t dev)
+                        struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode *inode;
        int error = -ENOSPC;
-        gid_t gid;
+        inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
-        if (dir->i_mode & S_ISGID) {
-                gid = dir->i_gid;
-                if (S_ISDIR(mode))
-                        mode |= S_ISGID;
-        } else {
-                gid = current_fsgid();
-        }
-        inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), gid, mode, dev);
        if (inode) {
                dir->i_ctime = dir->i_mtime = CURRENT_TIME;
                d_instantiate(dentry, inode);
@@ -523,7 +537,7 @@ static int hugetlbfs_mknod(struct inode *dir,
        return error;
 }
-static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
        if (!retval)
@@ -531,7 +545,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        return retval;
 }
-static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
        return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
@@ -541,15 +555,8 @@ static int hugetlbfs_symlink(struct inode *dir,
 {
        struct inode *inode;
        int error = -ENOSPC;
-        gid_t gid;
-        if (dir->i_mode & S_ISGID)
-                gid = dir->i_gid;
-        else
-                gid = current_fsgid();
-        inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(),
+        inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
-                                        gid, S_IFLNK|S_IRWXUGO, 0);
        if (inode) {
                int l = strlen(symname)+1;
                error = page_symlink(inode, symname, l);
@@ -666,7 +673,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 static void hugetlbfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
 }
@@ -858,8 +864,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
        sb->s_magic = HUGETLBFS_MAGIC;
        sb->s_op = &hugetlbfs_ops;
        sb->s_time_gran = 1;
-        inode = hugetlbfs_get_inode(sb, config.uid, config.gid,
+        inode = hugetlbfs_get_root(sb, &config);
-                                        S_IFDIR | config.mode, 0);
        if (!inode)
                goto out_free;
@@ -957,8 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
        path.mnt = mntget(hugetlbfs_vfsmount);
        error = -ENOSPC;
-        inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
+        inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0);
-                                current_fsgid(), S_IFREG | S_IRWXUGO, 0);
        if (!inode)
                goto out_dentry;
diff --git a/fs/inode.c b/fs/inode.c
index ee4e66b998f4..87535753ab04 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,6 +26,7 @@
 #include <linux/ima.h>
 #include <linux/cred.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
+#include <linux/ratelimit.h>
 #include "internal.h"
 /*
@@ -191,6 +192,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        }
        inode->i_private = NULL;
        inode->i_mapping = mapping;
+        INIT_LIST_HEAD(&inode->i_dentry);       /* buggered by rcu freeing */
 #ifdef CONFIG_FS_POSIX_ACL
        inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
 #endif
@@ -241,6 +243,11 @@ void __destroy_inode(struct inode *inode)
        BUG_ON(inode_has_buffers(inode));
        security_inode_free(inode);
        fsnotify_inode_delete(inode);
+        if (!inode->i_nlink) {
+                WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
+                atomic_long_dec(&inode->i_sb->s_remove_count);
+        }
 #ifdef CONFIG_FS_POSIX_ACL
        if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
                posix_acl_release(inode->i_acl);
@@ -254,7 +261,6 @@ EXPORT_SYMBOL(__destroy_inode);
 static void i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(inode_cachep, inode);
 }
@@ -268,6 +274,85 @@ static void destroy_inode(struct inode *inode)
                call_rcu(&inode->i_rcu, i_callback);
 }
+/**
+ * drop_nlink - directly drop an inode's link count
+ * @inode: inode
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.  In cases
+ * where we are attempting to track writes to the
+ * filesystem, a decrement to zero means an imminent
+ * write when the file is truncated and actually unlinked
+ * on the filesystem.
+ */
+void drop_nlink(struct inode *inode)
+{
+        WARN_ON(inode->i_nlink == 0);
+        inode->__i_nlink--;
+        if (!inode->i_nlink)
+                atomic_long_inc(&inode->i_sb->s_remove_count);
+}
+EXPORT_SYMBOL(drop_nlink);
+/**
+ * clear_nlink - directly zero an inode's link count
+ * @inode: inode
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.  See
+ * drop_nlink() for why we care about i_nlink hitting zero.
+ */
+void clear_nlink(struct inode *inode)
+{
+        if (inode->i_nlink) {
+                inode->__i_nlink = 0;
+                atomic_long_inc(&inode->i_sb->s_remove_count);
+        }
+}
+EXPORT_SYMBOL(clear_nlink);
+/**
+ * set_nlink - directly set an inode's link count
+ * @inode: inode
+ * @nlink: new nlink (should be non-zero)
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.
+ */
+void set_nlink(struct inode *inode, unsigned int nlink)
+{
+        if (!nlink) {
+                printk_ratelimited(KERN_INFO
+                        "set_nlink() clearing i_nlink on %s inode %li\n",
+                        inode->i_sb->s_type->name, inode->i_ino);
+                clear_nlink(inode);
+        } else {
+                /* Yes, some filesystems do change nlink from zero to one */
+                if (inode->i_nlink == 0)
+                        atomic_long_dec(&inode->i_sb->s_remove_count);
+                inode->__i_nlink = nlink;
+        }
+}
+EXPORT_SYMBOL(set_nlink);
+/**
+ * inc_nlink - directly increment an inode's link count
+ * @inode: inode
+ *
+ * This is a low-level filesystem helper to replace any
+ * direct filesystem manipulation of i_nlink.  Currently,
+ * it is only here for parity with dec_nlink().
+ */
+void inc_nlink(struct inode *inode)
+{
+        if (WARN_ON(inode->i_nlink == 0))
+                atomic_long_dec(&inode->i_sb->s_remove_count);
+        inode->__i_nlink++;
+}
+EXPORT_SYMBOL(inc_nlink);
 void address_space_init_once(struct address_space *mapping)
 {
        memset(mapping, 0, sizeof(*mapping));
@@ -290,7 +375,6 @@ void inode_init_once(struct inode *inode)
 {
        memset(inode, 0, sizeof(*inode));
        INIT_HLIST_NODE(&inode->i_hash);
-        INIT_LIST_HEAD(&inode->i_dentry);
        INIT_LIST_HEAD(&inode->i_devices);
        INIT_LIST_HEAD(&inode->i_wb_list);
        INIT_LIST_HEAD(&inode->i_lru);
@@ -1508,7 +1592,7 @@ void file_update_time(struct file *file)
        if (sync_it & S_MTIME)
                inode->i_mtime = now;
        mark_inode_dirty_sync(inode);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
 }
 EXPORT_SYMBOL(file_update_time);
@@ -1647,7 +1731,7 @@ EXPORT_SYMBOL(init_special_inode);
 * @mode: mode of the new inode
 */
 void inode_init_owner(struct inode *inode, const struct inode *dir,
-                        mode_t mode)
+                        umode_t mode)
 {
        inode->i_uid = current_fsuid();
        if (dir && dir->i_mode & S_ISGID) {
diff --git a/fs/internal.h b/fs/internal.h
index fe327c20af83..9962c59ba280 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -15,19 +15,14 @@ struct super_block;
 struct file_system_type;
 struct linux_binprm;
 struct path;
+struct mount;
 /*
 * block_dev.c
 */
 #ifdef CONFIG_BLOCK
-extern struct super_block *blockdev_superblock;
 extern void __init bdev_cache_init(void);
-static inline int sb_is_blkdev_sb(struct super_block *sb)
-{
-        return sb == blockdev_superblock;
-}
 extern int __sync_blockdev(struct block_device *bdev, int wait);
 #else
@@ -35,11 +30,6 @@ static inline void bdev_cache_init(void)
 {
 }
-static inline int sb_is_blkdev_sb(struct super_block *sb)
-{
-        return 0;
-}
 static inline int __sync_blockdev(struct block_device *bdev, int wait)
 {
        return 0;
@@ -52,28 +42,17 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
 extern void __init chrdev_init(void);
 /*
- * exec.c
- */
-extern int check_unsafe_exec(struct linux_binprm *);
-/*
 * namespace.c
 */
 extern int copy_mount_options(const void __user *, unsigned long *);
 extern int copy_mount_string(const void __user *, char **);
-extern unsigned int mnt_get_count(struct vfsmount *mnt);
-extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
 extern struct vfsmount *lookup_mnt(struct path *);
-extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
-                                struct vfsmount *);
-extern void release_mounts(struct list_head *);
-extern void umount_tree(struct vfsmount *, int, struct list_head *);
-extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
 extern int finish_automount(struct vfsmount *, struct path *);
 extern void mnt_make_longterm(struct vfsmount *);
 extern void mnt_make_shortterm(struct vfsmount *);
+extern int sb_prepare_remount_readonly(struct super_block *);
 extern void __init mnt_init(void);
@@ -98,10 +77,9 @@ extern struct file *get_empty_filp(void);
 */
 extern int do_remount_sb(struct super_block *, int, void *, int);
 extern bool grab_super_passive(struct super_block *sb);
-extern void __put_super(struct super_block *sb);
-extern void put_super(struct super_block *sb);
 extern struct dentry *mount_fs(struct file_system_type *,
                               int, const char *, void *);
+extern struct super_block *user_get_super(dev_t);
 /*
 * open.c
@@ -111,7 +89,7 @@ extern struct file *nameidata_to_filp(struct nameidata *);
 extern void release_open_intent(struct nameidata *);
 struct open_flags {
        int open_flag;
-        int mode;
+        umode_t mode;
        int acc_mode;
        int intent;
 };
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1d9b9fcb2db4..066836e81848 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -42,7 +42,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
        error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
        if (error == -ENOIOCTLCMD)
-                error = -EINVAL;
+                error = -ENOTTY;
 out:
        return error;
 }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index f950059525fc..7b99f5f460be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -85,7 +85,6 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
 static void isofs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
 }
@@ -170,8 +169,8 @@ struct iso9660_options{
        unsigned char map;
        unsigned char check;
        unsigned int blocksize;
-        mode_t fmode;
+        umode_t fmode;
-        mode_t dmode;
+        umode_t dmode;
        gid_t gid;
        uid_t uid;
        char *iocharset;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 7d33de84f52a..0e73f63d9274 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -50,14 +50,14 @@ struct isofs_sb_info {
        unsigned int  s_uid_set:1;
        unsigned int  s_gid_set:1;
-        mode_t s_fmode;
+        umode_t s_fmode;
-        mode_t s_dmode;
+        umode_t s_dmode;
        gid_t s_gid;
        uid_t s_uid;
        struct nls_table *s_nls_iocharset; /* Native language support table */
 };
-#define ISOFS_INVALID_MODE ((mode_t) -1)
+#define ISOFS_INVALID_MODE ((umode_t) -1)
 static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb)
 {
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index f94fc48ff3a0..5d1a00a5041b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -537,7 +537,7 @@ int cleanup_journal_tail(journal_t *journal)
 * them.
 *
 * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
+ * Returns number of buffers reaped (for debug)
 */
 static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index fea8dd661d2b..a96cff0c5f1d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -166,7 +166,7 @@ loop:
                 */
                jbd_debug(1, "Now suspending kjournald\n");
                spin_unlock(&journal->j_state_lock);
-                refrigerator();
+                try_to_freeze();
                spin_lock(&journal->j_state_lock);
        } else {
                /*
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 16a698bd906d..d49d202903fb 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -565,7 +565,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 *
 * Called with the journal locked.
 * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
+ * Returns number of buffers reaped (for debug)
 */
 static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0fa0123151d3..c0a5f9f1b127 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -173,7 +173,7 @@ loop:
                 */
                jbd_debug(1, "Now suspending kjournald2\n");
                write_unlock(&journal->j_state_lock);
-                refrigerator();
+                try_to_freeze();
                write_lock(&journal->j_state_lock);
        } else {
                /*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index be6169bd8acd..973ac5822bd7 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -22,16 +22,16 @@
 static int jffs2_readdir (struct file *, void *, filldir_t);
-static int jffs2_create (struct inode *,struct dentry *,int,
+static int jffs2_create (struct inode *,struct dentry *,umode_t,
                         struct nameidata *);
 static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
                                    struct nameidata *);
 static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
 static int jffs2_unlink (struct inode *,struct dentry *);
 static int jffs2_symlink (struct inode *,struct dentry *,const char *);
-static int jffs2_mkdir (struct inode *,struct dentry *,int);
+static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
 static int jffs2_rmdir (struct inode *,struct dentry *);
-static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t);
+static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
 static int jffs2_rename (struct inode *, struct dentry *,
                         struct inode *, struct dentry *);
@@ -169,8 +169,8 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
 /***********************************************************************/
-static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
+static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
-                        struct nameidata *nd)
+                        umode_t mode, struct nameidata *nd)
 {
        struct jffs2_raw_inode *ri;
        struct jffs2_inode_info *f, *dir_f;
@@ -450,7 +450,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 }
-static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
+static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode)
 {
        struct jffs2_inode_info *f, *dir_f;
        struct jffs2_sb_info *c;
@@ -618,7 +618,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
        return ret;
 }
-static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, dev_t rdev)
+static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct jffs2_inode_info *f, *dir_f;
        struct jffs2_sb_info *c;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e7e974454115..8be4925296cf 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -45,7 +45,6 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
 static void jffs2_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
 }
@@ -97,9 +96,9 @@ static const char *jffs2_compr_name(unsigned int compr)
        }
 }
-static int jffs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int jffs2_show_options(struct seq_file *s, struct dentry *root)
 {
-        struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb);
+        struct jffs2_sb_info *c = JFFS2_SB_INFO(root->d_sb);
        struct jffs2_mount_opts *opts = &c->mount_opts;
        if (opts->override_compr)
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 6f98a1866776..f19d1e04a374 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -68,7 +68,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                unsigned int oldflags;
                int err;
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        return err;
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
 setflags_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return err;
        }
        default:
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cc5f811ed383..2eb952c41a69 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2349,7 +2349,7 @@ int jfsIOWait(void *arg)
                if (freezing(current)) {
                        spin_unlock_irq(&log_redrive_lock);
-                        refrigerator();
+                        try_to_freeze();
                } else {
                        set_current_state(TASK_INTERRUPTIBLE);
                        spin_unlock_irq(&log_redrive_lock);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index af9606057dde..bb8b661bcc50 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2800,7 +2800,7 @@ int jfs_lazycommit(void *arg)
                if (freezing(current)) {
                        LAZY_UNLOCK(flags);
-                        refrigerator();
+                        try_to_freeze();
                } else {
                        DECLARE_WAITQUEUE(wq, current);
@@ -2994,7 +2994,7 @@ int jfs_sync(void *arg)
                if (freezing(current)) {
                        TXN_UNLOCK();
-                        refrigerator();
+                        try_to_freeze();
                } else {
                        set_current_state(TASK_INTERRUPTIBLE);
                        TXN_UNLOCK();
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a112ad96e474..5f7c160ea64f 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -72,7 +72,7 @@ static inline void free_ea_wmap(struct inode *inode)
 * RETURN:      Errors from subroutines
 *
 */
-static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
+static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        int rc = 0;
@@ -205,7 +205,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
 * note:
 * EACCESS: user needs search+write permission on the parent directory
 */
-static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
+static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 {
        int rc = 0;
        tid_t tid;              /* transaction id */
@@ -1353,7 +1353,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 * FUNCTION:    Create a special file (device)
 */
 static int jfs_mknod(struct inode *dir, struct dentry *dentry,
-                int mode, dev_t rdev)
+                umode_t mode, dev_t rdev)
 {
        struct jfs_inode_info *jfs_ip;
        struct btstack btstack;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index a44eff076c17..682bca642f38 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -119,7 +119,6 @@ static void jfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
        struct jfs_inode_info *ji = JFS_IP(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(jfs_inode_cachep, ji);
 }
@@ -609,9 +608,9 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
        return 0;
 }
-static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int jfs_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
+        struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
        if (sbi->uid != -1)
                seq_printf(seq, ",uid=%d", sbi->uid);
diff --git a/fs/libfs.c b/fs/libfs.c
index f6d411eef1e7..5b2dbb3ba4fc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -12,7 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/exportfs.h>
 #include <linux/writeback.h>
-#include <linux/buffer_head.h>
+#include <linux/buffer_head.h> /* sync_mapping_buffers */
 #include <asm/uaccess.h>
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 1ca0679c80bf..2240d384d787 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -403,7 +403,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
 {
        struct super_block *sb = datap;
-        return sb == file->f_file->f_path.mnt->mnt_sb;
+        return sb == file->f_file->f_path.dentry->d_sb;
 }
 /**
diff --git a/fs/locks.c b/fs/locks.c
index 3b0d05dcd7c1..637694bf3a03 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1205,6 +1205,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
        int want_write = (mode & O_ACCMODE) != O_RDONLY;
        new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
+        if (IS_ERR(new_fl))
+                return PTR_ERR(new_fl);
        lock_flocks();
@@ -1221,12 +1223,6 @@ int __break_lease(struct inode *inode, unsigned int mode)
                if (fl->fl_owner == current->files)
                        i_have_this_lease = 1;
-        if (IS_ERR(new_fl) && !i_have_this_lease
-                        && ((mode & O_NONBLOCK) == 0)) {
-                error = PTR_ERR(new_fl);
-                goto out;
-        }
        break_time = 0;
        if (lease_break_time > 0) {
                break_time = jiffies + lease_break_time * HZ;
@@ -1284,8 +1280,7 @@ restart:
 out:
        unlock_flocks();
-        if (!IS_ERR(new_fl))
+        locks_free_lock(new_fl);
-                locks_free_lock(new_fl);
        return error;
 }
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b7d7f67cee5a..501043e8966c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -482,7 +482,7 @@ out:
        return ret;
 }
-static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
@@ -501,7 +501,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        return __logfs_create(dir, dentry, inode, NULL, 0);
 }
-static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        struct inode *inode;
@@ -517,7 +517,7 @@ static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
        return __logfs_create(dir, dentry, inode, NULL, 0);
 }
-static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int logfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
                dev_t rdev)
 {
        struct inode *inode;
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 7e441ad5f792..388df1aa35e5 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -144,7 +144,6 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
 static void logfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
 }
@@ -324,7 +323,7 @@ static void logfs_set_ino_generation(struct super_block *sb,
        mutex_unlock(&super->s_journal_mutex);
 }
-struct inode *logfs_new_inode(struct inode *dir, int mode)
+struct inode *logfs_new_inode(struct inode *dir, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 398ecff6e548..926373866a55 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -520,7 +520,7 @@ extern const struct super_operations logfs_super_operations;
 struct inode *logfs_iget(struct super_block *sb, ino_t ino);
 struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie);
 void logfs_safe_iput(struct inode *inode, int cookie);
-struct inode *logfs_new_inode(struct inode *dir, int mode);
+struct inode *logfs_new_inode(struct inode *dir, umode_t mode);
 struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
 struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
 int logfs_init_inode_cache(void);
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index ef175cb8cfd8..4bc50dac8e97 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -209,7 +209,7 @@ void minix_free_inode(struct inode * inode)
        mark_buffer_dirty(bh);
 }
-struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
+struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error)
 {
        struct super_block *sb = dir->i_sb;
        struct minix_sb_info *sbi = minix_sb(sb);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1d9e33966db0..fa8b612b8ce2 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -71,7 +71,6 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
 static void minix_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(minix_inode_cachep, minix_i(inode));
 }
@@ -263,23 +262,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
                goto out_no_root;
        }
-        ret = -ENOMEM;
-        s->s_root = d_alloc_root(root_inode);
-        if (!s->s_root)
-                goto out_iput;
-        if (!(s->s_flags & MS_RDONLY)) {
-                if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
-                        ms->s_state &= ~MINIX_VALID_FS;
-                mark_buffer_dirty(bh);
-        }
-        if (!(sbi->s_mount_state & MINIX_VALID_FS))
-                printk("MINIX-fs: mounting unchecked file system, "
-                        "running fsck is recommended\n");
-        else if (sbi->s_mount_state & MINIX_ERROR_FS)
-                printk("MINIX-fs: mounting file system with errors, "
-                        "running fsck is recommended\n");
        /* Apparently minix can create filesystems that allocate more blocks for
         * the bitmaps than needed.  We simply ignore that, but verify it didn't
         * create one with not enough blocks and bail out if so.
@@ -300,6 +282,23 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
                goto out_iput;
        }
+        ret = -ENOMEM;
+        s->s_root = d_alloc_root(root_inode);
+        if (!s->s_root)
+                goto out_iput;
+        if (!(s->s_flags & MS_RDONLY)) {
+                if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
+                        ms->s_state &= ~MINIX_VALID_FS;
+                mark_buffer_dirty(bh);
+        }
+        if (!(sbi->s_mount_state & MINIX_VALID_FS))
+                printk("MINIX-fs: mounting unchecked file system, "
+                        "running fsck is recommended\n");
+        else if (sbi->s_mount_state & MINIX_ERROR_FS)
+                printk("MINIX-fs: mounting file system with errors, "
+                        "running fsck is recommended\n");
        return 0;
 out_iput:
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 26bbd55e82ea..c889ef0aa571 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -46,7 +46,7 @@ struct minix_sb_info {
 extern struct inode *minix_iget(struct super_block *, unsigned long);
 extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **);
 extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
-extern struct inode * minix_new_inode(const struct inode *, int, int *);
+extern struct inode * minix_new_inode(const struct inode *, umode_t, int *);
 extern void minix_free_inode(struct inode * inode);
 extern unsigned long minix_count_free_inodes(struct super_block *sb);
 extern int minix_new_block(struct inode * inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 6e6777f1b4b2..2f76e38c2065 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -36,7 +36,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
        return NULL;
 }
-static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        int error;
        struct inode *inode;
@@ -54,7 +54,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_
        return error;
 }
-static int minix_create(struct inode * dir, struct dentry *dentry, int mode,
+static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        return minix_mknod(dir, dentry, mode, 0);
@@ -103,7 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
        return add_nondir(dentry, inode);
 }
-static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode)
+static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
 {
        struct inode * inode;
        int err = -EMLINK;
diff --git a/fs/mount.h b/fs/mount.h
new file mode 100644
index 000000000000..4ef36d93e5a2
--- /dev/null
+++ b/fs/mount.h
@@ -0,0 +1,76 @@
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/poll.h>
+struct mnt_namespace {
+        atomic_t                count;
+        struct mount *  root;
+        struct list_head        list;
+        wait_queue_head_t poll;
+        int event;
+};
+struct mnt_pcp {
+        int mnt_count;
+        int mnt_writers;
+};
+struct mount {
+        struct list_head mnt_hash;
+        struct mount *mnt_parent;
+        struct dentry *mnt_mountpoint;
+        struct vfsmount mnt;
+#ifdef CONFIG_SMP
+        struct mnt_pcp __percpu *mnt_pcp;
+        atomic_t mnt_longterm;          /* how many of the refs are longterm */
+#else
+        int mnt_count;
+        int mnt_writers;
+#endif
+        struct list_head mnt_mounts;    /* list of children, anchored here */
+        struct list_head mnt_child;     /* and going through their mnt_child */
+        struct list_head mnt_instance;  /* mount instance on sb->s_mounts */
+        const char *mnt_devname;        /* Name of device e.g. /dev/dsk/hda1 */
+        struct list_head mnt_list;
+        struct list_head mnt_expire;    /* link in fs-specific expiry list */
+        struct list_head mnt_share;     /* circular list of shared mounts */
+        struct list_head mnt_slave_list;/* list of slave mounts */
+        struct list_head mnt_slave;     /* slave list entry */
+        struct mount *mnt_master;       /* slave is on master->mnt_slave_list */
+        struct mnt_namespace *mnt_ns;   /* containing namespace */
+#ifdef CONFIG_FSNOTIFY
+        struct hlist_head mnt_fsnotify_marks;
+        __u32 mnt_fsnotify_mask;
+#endif
+        int mnt_id;                     /* mount identifier */
+        int mnt_group_id;               /* peer group identifier */
+        int mnt_expiry_mark;            /* true if marked for expiry */
+        int mnt_pinned;
+        int mnt_ghosts;
+};
+static inline struct mount *real_mount(struct vfsmount *mnt)
+{
+        return container_of(mnt, struct mount, mnt);
+}
+static inline int mnt_has_parent(struct mount *mnt)
+{
+        return mnt != mnt->mnt_parent;
+}
+extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
+static inline void get_mnt_ns(struct mnt_namespace *ns)
+{
+        atomic_inc(&ns->count);
+}
+struct proc_mounts {
+        struct seq_file m; /* must be the first element */
+        struct mnt_namespace *ns;
+        struct path root;
+        int (*show)(struct seq_file *, struct vfsmount *);
+};
+extern const struct seq_operations mounts_op;
diff --git a/fs/namei.c b/fs/namei.c
index 5008f01787f5..c283a1ec008e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -36,6 +36,7 @@
 #include <asm/uaccess.h>
 #include "internal.h"
+#include "mount.h"
 /* [Feb-1997 T. Schoebel-Theuer]
 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -676,36 +677,38 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
 static int follow_up_rcu(struct path *path)
 {
-        struct vfsmount *parent;
+        struct mount *mnt = real_mount(path->mnt);
+        struct mount *parent;
        struct dentry *mountpoint;
-        parent = path->mnt->mnt_parent;
+        parent = mnt->mnt_parent;
-        if (parent == path->mnt)
+        if (&parent->mnt == path->mnt)
                return 0;
-        mountpoint = path->mnt->mnt_mountpoint;
+        mountpoint = mnt->mnt_mountpoint;
        path->dentry = mountpoint;
-        path->mnt = parent;
+        path->mnt = &parent->mnt;
        return 1;
 }
 int follow_up(struct path *path)
 {
-        struct vfsmount *parent;
+        struct mount *mnt = real_mount(path->mnt);
+        struct mount *parent;
        struct dentry *mountpoint;
        br_read_lock(vfsmount_lock);
-        parent = path->mnt->mnt_parent;
+        parent = mnt->mnt_parent;
-        if (parent == path->mnt) {
+        if (&parent->mnt == path->mnt) {
                br_read_unlock(vfsmount_lock);
                return 0;
        }
-        mntget(parent);
+        mntget(&parent->mnt);
-        mountpoint = dget(path->mnt->mnt_mountpoint);
+        mountpoint = dget(mnt->mnt_mountpoint);
        br_read_unlock(vfsmount_lock);
        dput(path->dentry);
        path->dentry = mountpoint;
        mntput(path->mnt);
-        path->mnt = parent;
+        path->mnt = &parent->mnt;
        return 1;
 }
@@ -884,7 +887,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
                               struct inode **inode)
 {
        for (;;) {
-                struct vfsmount *mounted;
+                struct mount *mounted;
                /*
                 * Don't forget we might have a non-mountpoint managed dentry
                 * that wants to block transit.
@@ -898,8 +901,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
                mounted = __lookup_mnt(path->mnt, path->dentry, 1);
                if (!mounted)
                        break;
-                path->mnt = mounted;
+                path->mnt = &mounted->mnt;
-                path->dentry = mounted->mnt_root;
+                path->dentry = mounted->mnt.mnt_root;
                nd->flags |= LOOKUP_JUMPED;
                nd->seq = read_seqcount_begin(&path->dentry->d_seq);
                /*
@@ -915,12 +918,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
 static void follow_mount_rcu(struct nameidata *nd)
 {
        while (d_mountpoint(nd->path.dentry)) {
-                struct vfsmount *mounted;
+                struct mount *mounted;
                mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
                if (!mounted)
                        break;
-                nd->path.mnt = mounted;
+                nd->path.mnt = &mounted->mnt;
-                nd->path.dentry = mounted->mnt_root;
+                nd->path.dentry = mounted->mnt.mnt_root;
                nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
        }
 }
@@ -1976,7 +1979,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
        }
 }
-int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
+int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        int error = may_create(dir, dentry);
@@ -2177,7 +2180,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
        /* Negative dentry, just create the file */
        if (!dentry->d_inode) {
-                int mode = op->mode;
+                umode_t mode = op->mode;
                if (!IS_POSIXACL(dir->d_inode))
                        mode &= ~current_umask();
                /*
@@ -2444,7 +2447,7 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, struct pat
 }
 EXPORT_SYMBOL(user_path_create);
-int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
        int error = may_create(dir, dentry);
@@ -2472,7 +2475,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
        return error;
 }
-static int may_mknod(mode_t mode)
+static int may_mknod(umode_t mode)
 {
        switch (mode & S_IFMT) {
        case S_IFREG:
@@ -2489,7 +2492,7 @@ static int may_mknod(mode_t mode)
        }
 }
-SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
+SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
                unsigned, dev)
 {
        struct dentry *dentry;
@@ -2536,12 +2539,12 @@ out_dput:
        return error;
 }
-SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev)
+SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
 {
        return sys_mknodat(AT_FDCWD, filename, mode, dev);
 }
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int error = may_create(dir, dentry);
@@ -2562,7 +2565,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        return error;
 }
-SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 {
        struct dentry *dentry;
        struct path path;
@@ -2590,7 +2593,7 @@ out_dput:
        return error;
 }
-SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
 {
        return sys_mkdirat(AT_FDCWD, pathname, mode);
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index cfc6d4448aa5..e6081996c9a2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -9,30 +9,17 @@
 */
 #include <linux/syscalls.h>
-#include <linux/slab.h>
+#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/acct.h>
 #include <linux/capability.h>
-#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/sysfs.h>
-#include <linux/seq_file.h>
 #include <linux/mnt_namespace.h>
 #include <linux/namei.h>
-#include <linux/nsproxy.h>
 #include <linux/security.h>
-#include <linux/mount.h>
-#include <linux/ramfs.h>
-#include <linux/log2.h>
 #include <linux/idr.h>
-#include <linux/fs_struct.h>
+#include <linux/acct.h>         /* acct_auto_close_mnt */
-#include <linux/fsnotify.h>
+#include <linux/ramfs.h>        /* init_rootfs */
-#include <asm/uaccess.h>
+#include <linux/fs_struct.h>    /* get_fs_root et.al. */
-#include <asm/unistd.h>
+#include <linux/fsnotify.h>     /* fsnotify_vfsmount_delete */
+#include <linux/uaccess.h>
 #include "pnode.h"
 #include "internal.h"
@@ -78,7 +65,7 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 * allocation is serialized by namespace_sem, but we need the spinlock to
 * serialize with freeing.
 */
-static int mnt_alloc_id(struct vfsmount *mnt)
+static int mnt_alloc_id(struct mount *mnt)
 {
        int res;
@@ -95,7 +82,7 @@ retry:
        return res;
 }
-static void mnt_free_id(struct vfsmount *mnt)
+static void mnt_free_id(struct mount *mnt)
 {
        int id = mnt->mnt_id;
        spin_lock(&mnt_id_lock);
@@ -110,7 +97,7 @@ static void mnt_free_id(struct vfsmount *mnt)
 *
 * mnt_group_ida is protected by namespace_sem
 */
-static int mnt_alloc_group_id(struct vfsmount *mnt)
+static int mnt_alloc_group_id(struct mount *mnt)
 {
        int res;
@@ -129,7 +116,7 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
 /*
 * Release a peer group ID
 */
-void mnt_release_group_id(struct vfsmount *mnt)
+void mnt_release_group_id(struct mount *mnt)
 {
        int id = mnt->mnt_group_id;
        ida_remove(&mnt_group_ida, id);
@@ -141,7 +128,7 @@ void mnt_release_group_id(struct vfsmount *mnt)
 /*
 * vfsmount lock must be held for read
 */
-static inline void mnt_add_count(struct vfsmount *mnt, int n)
+static inline void mnt_add_count(struct mount *mnt, int n)
 {
 #ifdef CONFIG_SMP
        this_cpu_add(mnt->mnt_pcp->mnt_count, n);
@@ -152,35 +139,10 @@ static inline void mnt_add_count(struct vfsmount *mnt, int n)
 #endif
 }
-static inline void mnt_set_count(struct vfsmount *mnt, int n)
-{
-#ifdef CONFIG_SMP
-        this_cpu_write(mnt->mnt_pcp->mnt_count, n);
-#else
-        mnt->mnt_count = n;
-#endif
-}
-/*
- * vfsmount lock must be held for read
- */
-static inline void mnt_inc_count(struct vfsmount *mnt)
-{
-        mnt_add_count(mnt, 1);
-}
-/*
- * vfsmount lock must be held for read
- */
-static inline void mnt_dec_count(struct vfsmount *mnt)
-{
-        mnt_add_count(mnt, -1);
-}
 /*
 * vfsmount lock must be held for write
 */
-unsigned int mnt_get_count(struct vfsmount *mnt)
+unsigned int mnt_get_count(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
        unsigned int count = 0;
@@ -196,9 +158,9 @@ unsigned int mnt_get_count(struct vfsmount *mnt)
 #endif
 }
-static struct vfsmount *alloc_vfsmnt(const char *name)
+static struct mount *alloc_vfsmnt(const char *name)
 {
-        struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
+        struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
        if (mnt) {
                int err;
@@ -277,7 +239,7 @@ int __mnt_is_readonly(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
-static inline void mnt_inc_writers(struct vfsmount *mnt)
+static inline void mnt_inc_writers(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
        this_cpu_inc(mnt->mnt_pcp->mnt_writers);
@@ -286,7 +248,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt)
 #endif
 }
-static inline void mnt_dec_writers(struct vfsmount *mnt)
+static inline void mnt_dec_writers(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
        this_cpu_dec(mnt->mnt_pcp->mnt_writers);
@@ -295,7 +257,7 @@ static inline void mnt_dec_writers(struct vfsmount *mnt)
 #endif
 }
-static unsigned int mnt_get_writers(struct vfsmount *mnt)
+static unsigned int mnt_get_writers(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
        unsigned int count = 0;
@@ -311,6 +273,15 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
 #endif
 }
+static int mnt_is_readonly(struct vfsmount *mnt)
+{
+        if (mnt->mnt_sb->s_readonly_remount)
+                return 1;
+        /* Order wrt setting s_flags/s_readonly_remount in do_remount() */
+        smp_rmb();
+        return __mnt_is_readonly(mnt);
+}
 /*
 * Most r/o checks on a fs are for operations that take
 * discrete amounts of time, like a write() or unlink().
@@ -321,7 +292,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
 */
 /**
 * mnt_want_write - get write access to a mount
- * @mnt: the mount on which to take a write
+ * @m: the mount on which to take a write
 *
 * This tells the low-level filesystem that a write is
 * about to be performed to it, and makes sure that
@@ -329,8 +300,9 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
 * the write operation is finished, mnt_drop_write()
 * must be called.  This is effectively a refcount.
 */
-int mnt_want_write(struct vfsmount *mnt)
+int mnt_want_write(struct vfsmount *m)
 {
+        struct mount *mnt = real_mount(m);
        int ret = 0;
        preempt_disable();
@@ -341,7 +313,7 @@ int mnt_want_write(struct vfsmount *mnt)
         * incremented count after it has set MNT_WRITE_HOLD.
         */
        smp_mb();
-        while (mnt->mnt_flags & MNT_WRITE_HOLD)
+        while (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
                cpu_relax();
        /*
         * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
@@ -349,12 +321,10 @@ int mnt_want_write(struct vfsmount *mnt)
         * MNT_WRITE_HOLD is cleared.
         */
        smp_rmb();
-        if (__mnt_is_readonly(mnt)) {
+        if (mnt_is_readonly(m)) {
                mnt_dec_writers(mnt);
                ret = -EROFS;
-                goto out;
        }
-out:
        preempt_enable();
        return ret;
 }
@@ -378,7 +348,7 @@ int mnt_clone_write(struct vfsmount *mnt)
        if (__mnt_is_readonly(mnt))
                return -EROFS;
        preempt_disable();
-        mnt_inc_writers(mnt);
+        mnt_inc_writers(real_mount(mnt));
        preempt_enable();
        return 0;
 }
@@ -412,17 +382,23 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
 void mnt_drop_write(struct vfsmount *mnt)
 {
        preempt_disable();
-        mnt_dec_writers(mnt);
+        mnt_dec_writers(real_mount(mnt));
        preempt_enable();
 }
 EXPORT_SYMBOL_GPL(mnt_drop_write);
-static int mnt_make_readonly(struct vfsmount *mnt)
+void mnt_drop_write_file(struct file *file)
+{
+        mnt_drop_write(file->f_path.mnt);
+}
+EXPORT_SYMBOL(mnt_drop_write_file);
+static int mnt_make_readonly(struct mount *mnt)
 {
        int ret = 0;
        br_write_lock(vfsmount_lock);
-        mnt->mnt_flags |= MNT_WRITE_HOLD;
+        mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
        /*
         * After storing MNT_WRITE_HOLD, we'll read the counters. This store
         * should be visible before we do.
@@ -448,25 +424,61 @@ static int mnt_make_readonly(struct vfsmount *mnt)
        if (mnt_get_writers(mnt) > 0)
                ret = -EBUSY;
        else
-                mnt->mnt_flags |= MNT_READONLY;
+                mnt->mnt.mnt_flags |= MNT_READONLY;
        /*
         * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
         * that become unheld will see MNT_READONLY.
         */
        smp_wmb();
-        mnt->mnt_flags &= ~MNT_WRITE_HOLD;
+        mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
        br_write_unlock(vfsmount_lock);
        return ret;
 }
-static void __mnt_unmake_readonly(struct vfsmount *mnt)
+static void __mnt_unmake_readonly(struct mount *mnt)
 {
        br_write_lock(vfsmount_lock);
-        mnt->mnt_flags &= ~MNT_READONLY;
+        mnt->mnt.mnt_flags &= ~MNT_READONLY;
+        br_write_unlock(vfsmount_lock);
+}
+int sb_prepare_remount_readonly(struct super_block *sb)
+{
+        struct mount *mnt;
+        int err = 0;
+        /* Racy optimization.  Recheck the counter under MNT_WRITE_HOLD */
+        if (atomic_long_read(&sb->s_remove_count))
+                return -EBUSY;
+        br_write_lock(vfsmount_lock);
+        list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+                if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
+                        mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
+                        smp_mb();
+                        if (mnt_get_writers(mnt) > 0) {
+                                err = -EBUSY;
+                                break;
+                        }
+                }
+        }
+        if (!err && atomic_long_read(&sb->s_remove_count))
+                err = -EBUSY;
+        if (!err) {
+                sb->s_readonly_remount = 1;
+                smp_wmb();
+        }
+        list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
+                if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
+                        mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
+        }
        br_write_unlock(vfsmount_lock);
+        return err;
 }
-static void free_vfsmnt(struct vfsmount *mnt)
+static void free_vfsmnt(struct mount *mnt)
 {
        kfree(mnt->mnt_devname);
        mnt_free_id(mnt);
@@ -481,20 +493,20 @@ static void free_vfsmnt(struct vfsmount *mnt)
 * @dir. If @dir is set return the first mount else return the last mount.
 * vfsmount_lock must be held for read or write.
 */
-struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
+struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
                              int dir)
 {
        struct list_head *head = mount_hashtable + hash(mnt, dentry);
        struct list_head *tmp = head;
-        struct vfsmount *p, *found = NULL;
+        struct mount *p, *found = NULL;
        for (;;) {
                tmp = dir ? tmp->next : tmp->prev;
                p = NULL;
                if (tmp == head)
                        break;
-                p = list_entry(tmp, struct vfsmount, mnt_hash);
+                p = list_entry(tmp, struct mount, mnt_hash);
-                if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
+                if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
                        found = p;
                        break;
                }
@@ -508,16 +520,21 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
 */
 struct vfsmount *lookup_mnt(struct path *path)
 {
-        struct vfsmount *child_mnt;
+        struct mount *child_mnt;
        br_read_lock(vfsmount_lock);
-        if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
+        child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
-                mntget(child_mnt);
+        if (child_mnt) {
-        br_read_unlock(vfsmount_lock);
+                mnt_add_count(child_mnt, 1);
-        return child_mnt;
+                br_read_unlock(vfsmount_lock);
+                return &child_mnt->mnt;
+        } else {
+                br_read_unlock(vfsmount_lock);
+                return NULL;
+        }
 }
-static inline int check_mnt(struct vfsmount *mnt)
+static inline int check_mnt(struct mount *mnt)
 {
        return mnt->mnt_ns == current->nsproxy->mnt_ns;
 }
@@ -548,12 +565,12 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
 * Clear dentry's mounted state if it has no remaining mounts.
 * vfsmount_lock must be held for write.
 */
-static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
+static void dentry_reset_mounted(struct dentry *dentry)
 {
        unsigned u;
        for (u = 0; u < HASH_SIZE; u++) {
-                struct vfsmount *p;
+                struct mount *p;
                list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
                        if (p->mnt_mountpoint == dentry)
@@ -568,25 +585,26 @@ static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
 /*
 * vfsmount lock must be held for write
 */
-static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
+static void detach_mnt(struct mount *mnt, struct path *old_path)
 {
        old_path->dentry = mnt->mnt_mountpoint;
-        old_path->mnt = mnt->mnt_parent;
+        old_path->mnt = &mnt->mnt_parent->mnt;
        mnt->mnt_parent = mnt;
-        mnt->mnt_mountpoint = mnt->mnt_root;
+        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
        list_del_init(&mnt->mnt_child);
        list_del_init(&mnt->mnt_hash);
-        dentry_reset_mounted(old_path->mnt, old_path->dentry);
+        dentry_reset_mounted(old_path->dentry);
 }
 /*
 * vfsmount lock must be held for write
 */
-void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
+void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
-                        struct vfsmount *child_mnt)
+                        struct mount *child_mnt)
 {
-        child_mnt->mnt_parent = mntget(mnt);
+        mnt_add_count(mnt, 1);  /* essentially, that's mntget */
        child_mnt->mnt_mountpoint = dget(dentry);
+        child_mnt->mnt_parent = mnt;
        spin_lock(&dentry->d_lock);
        dentry->d_flags |= DCACHE_MOUNTED;
        spin_unlock(&dentry->d_lock);
@@ -595,15 +613,15 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
 /*
 * vfsmount lock must be held for write
 */
-static void attach_mnt(struct vfsmount *mnt, struct path *path)
+static void attach_mnt(struct mount *mnt, struct path *path)
 {
-        mnt_set_mountpoint(path->mnt, path->dentry, mnt);
+        mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
        list_add_tail(&mnt->mnt_hash, mount_hashtable +
                        hash(path->mnt, path->dentry));
-        list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
+        list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
 }
-static inline void __mnt_make_longterm(struct vfsmount *mnt)
+static inline void __mnt_make_longterm(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
        atomic_inc(&mnt->mnt_longterm);
@@ -611,7 +629,7 @@ static inline void __mnt_make_longterm(struct vfsmount *mnt)
 }
 /* needs vfsmount lock for write */
-static inline void __mnt_make_shortterm(struct vfsmount *mnt)
+static inline void __mnt_make_shortterm(struct mount *mnt)
 {
 #ifdef CONFIG_SMP
        atomic_dec(&mnt->mnt_longterm);
@@ -621,10 +639,10 @@ static inline void __mnt_make_shortterm(struct vfsmount *mnt)
 /*
 * vfsmount lock must be held for write
 */
-static void commit_tree(struct vfsmount *mnt)
+static void commit_tree(struct mount *mnt)
 {
-        struct vfsmount *parent = mnt->mnt_parent;
+        struct mount *parent = mnt->mnt_parent;
-        struct vfsmount *m;
+        struct mount *m;
        LIST_HEAD(head);
        struct mnt_namespace *n = parent->mnt_ns;
@@ -639,12 +657,12 @@ static void commit_tree(struct vfsmount *mnt)
        list_splice(&head, n->list.prev);
        list_add_tail(&mnt->mnt_hash, mount_hashtable +
-                                hash(parent, mnt->mnt_mountpoint));
+                                hash(&parent->mnt, mnt->mnt_mountpoint));
        list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
        touch_mnt_namespace(n);
 }
-static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
+static struct mount *next_mnt(struct mount *p, struct mount *root)
 {
        struct list_head *next = p->mnt_mounts.next;
        if (next == &p->mnt_mounts) {
@@ -657,14 +675,14 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
                        p = p->mnt_parent;
                }
        }
-        return list_entry(next, struct vfsmount, mnt_child);
+        return list_entry(next, struct mount, mnt_child);
 }
-static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
+static struct mount *skip_mnt_tree(struct mount *p)
 {
        struct list_head *prev = p->mnt_mounts.prev;
        while (prev != &p->mnt_mounts) {
-                p = list_entry(prev, struct vfsmount, mnt_child);
+                p = list_entry(prev, struct mount, mnt_child);
                prev = p->mnt_mounts.prev;
        }
        return p;
@@ -673,7 +691,7 @@ static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
 struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
-        struct vfsmount *mnt;
+        struct mount *mnt;
        struct dentry *root;
        if (!type)
@@ -684,7 +702,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
                return ERR_PTR(-ENOMEM);
        if (flags & MS_KERNMOUNT)
-                mnt->mnt_flags = MNT_INTERNAL;
+                mnt->mnt.mnt_flags = MNT_INTERNAL;
        root = mount_fs(type, flags, name, data);
        if (IS_ERR(root)) {
@@ -692,19 +710,22 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
                return ERR_CAST(root);
        }
-        mnt->mnt_root = root;
+        mnt->mnt.mnt_root = root;
-        mnt->mnt_sb = root->d_sb;
+        mnt->mnt.mnt_sb = root->d_sb;
-        mnt->mnt_mountpoint = mnt->mnt_root;
+        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
        mnt->mnt_parent = mnt;
-        return mnt;
+        br_write_lock(vfsmount_lock);
+        list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
+        br_write_unlock(vfsmount_lock);
+        return &mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
-static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
+static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                                        int flag)
 {
-        struct super_block *sb = old->mnt_sb;
+        struct super_block *sb = old->mnt.mnt_sb;
-        struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
+        struct mount *mnt = alloc_vfsmnt(old->mnt_devname);
        if (mnt) {
                if (flag & (CL_SLAVE | CL_PRIVATE))
@@ -718,12 +739,15 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
                                goto out_free;
                }
-                mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
+                mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
                atomic_inc(&sb->s_active);
-                mnt->mnt_sb = sb;
+                mnt->mnt.mnt_sb = sb;
-                mnt->mnt_root = dget(root);
+                mnt->mnt.mnt_root = dget(root);
-                mnt->mnt_mountpoint = mnt->mnt_root;
+                mnt->mnt_mountpoint = mnt->mnt.mnt_root;
                mnt->mnt_parent = mnt;
+                br_write_lock(vfsmount_lock);
+                list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
+                br_write_unlock(vfsmount_lock);
                if (flag & CL_SLAVE) {
                        list_add(&mnt->mnt_slave, &old->mnt_slave_list);
@@ -753,9 +777,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
        return NULL;
 }
-static inline void mntfree(struct vfsmount *mnt)
+static inline void mntfree(struct mount *mnt)
 {
-        struct super_block *sb = mnt->mnt_sb;
+        struct vfsmount *m = &mnt->mnt;
+        struct super_block *sb = m->mnt_sb;
        /*
         * This probably indicates that somebody messed
@@ -768,32 +793,32 @@ static inline void mntfree(struct vfsmount *mnt)
         * so mnt_get_writers() below is safe.
         */
        WARN_ON(mnt_get_writers(mnt));
-        fsnotify_vfsmount_delete(mnt);
+        fsnotify_vfsmount_delete(m);
-        dput(mnt->mnt_root);
+        dput(m->mnt_root);
        free_vfsmnt(mnt);
        deactivate_super(sb);
 }
-static void mntput_no_expire(struct vfsmount *mnt)
+static void mntput_no_expire(struct mount *mnt)
 {
 put_again:
 #ifdef CONFIG_SMP
        br_read_lock(vfsmount_lock);
        if (likely(atomic_read(&mnt->mnt_longterm))) {
-                mnt_dec_count(mnt);
+                mnt_add_count(mnt, -1);
                br_read_unlock(vfsmount_lock);
                return;
        }
        br_read_unlock(vfsmount_lock);
        br_write_lock(vfsmount_lock);
-        mnt_dec_count(mnt);
+        mnt_add_count(mnt, -1);
        if (mnt_get_count(mnt)) {
                br_write_unlock(vfsmount_lock);
                return;
        }
 #else
-        mnt_dec_count(mnt);
+        mnt_add_count(mnt, -1);
        if (likely(mnt_get_count(mnt)))
                return;
        br_write_lock(vfsmount_lock);
@@ -802,9 +827,10 @@ put_again:
                mnt_add_count(mnt, mnt->mnt_pinned + 1);
                mnt->mnt_pinned = 0;
                br_write_unlock(vfsmount_lock);
-                acct_auto_close_mnt(mnt);
+                acct_auto_close_mnt(&mnt->mnt);
                goto put_again;
        }
+        list_del(&mnt->mnt_instance);
        br_write_unlock(vfsmount_lock);
        mntfree(mnt);
 }
@@ -812,10 +838,11 @@ put_again:
 void mntput(struct vfsmount *mnt)
 {
        if (mnt) {
+                struct mount *m = real_mount(mnt);
                /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
-                if (unlikely(mnt->mnt_expiry_mark))
+                if (unlikely(m->mnt_expiry_mark))
-                        mnt->mnt_expiry_mark = 0;
+                        m->mnt_expiry_mark = 0;
-                mntput_no_expire(mnt);
+                mntput_no_expire(m);
        }
 }
 EXPORT_SYMBOL(mntput);
@@ -823,7 +850,7 @@ EXPORT_SYMBOL(mntput);
 struct vfsmount *mntget(struct vfsmount *mnt)
 {
        if (mnt)
-                mnt_inc_count(mnt);
+                mnt_add_count(real_mount(mnt), 1);
        return mnt;
 }
 EXPORT_SYMBOL(mntget);
@@ -831,16 +858,17 @@ EXPORT_SYMBOL(mntget);
 void mnt_pin(struct vfsmount *mnt)
 {
        br_write_lock(vfsmount_lock);
-        mnt->mnt_pinned++;
+        real_mount(mnt)->mnt_pinned++;
        br_write_unlock(vfsmount_lock);
 }
 EXPORT_SYMBOL(mnt_pin);
-void mnt_unpin(struct vfsmount *mnt)
+void mnt_unpin(struct vfsmount *m)
 {
+        struct mount *mnt = real_mount(m);
        br_write_lock(vfsmount_lock);
        if (mnt->mnt_pinned) {
-                mnt_inc_count(mnt);
+                mnt_add_count(mnt, 1);
                mnt->mnt_pinned--;
        }
        br_write_unlock(vfsmount_lock);
@@ -858,12 +886,12 @@ static inline void mangle(struct seq_file *m, const char *s)
 *
 * See also save_mount_options().
 */
-int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
+int generic_show_options(struct seq_file *m, struct dentry *root)
 {
        const char *options;
        rcu_read_lock();
-        options = rcu_dereference(mnt->mnt_sb->s_options);
+        options = rcu_dereference(root->d_sb->s_options);
        if (options != NULL && options[0]) {
                seq_putc(m, ',');
@@ -907,10 +935,10 @@ void replace_mount_options(struct super_block *sb, char *options)
 EXPORT_SYMBOL(replace_mount_options);
 #ifdef CONFIG_PROC_FS
-/* iterator */
+/* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
-        struct proc_mounts *p = m->private;
+        struct proc_mounts *p = container_of(m, struct proc_mounts, m);
        down_read(&namespace_sem);
        return seq_list_start(&p->ns->list, *pos);
@@ -918,7 +946,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
-        struct proc_mounts *p = m->private;
+        struct proc_mounts *p = container_of(m, struct proc_mounts, m);
        return seq_list_next(v, &p->ns->list, pos);
 }
@@ -928,219 +956,18 @@ static void m_stop(struct seq_file *m, void *v)
        up_read(&namespace_sem);
 }
-int mnt_had_events(struct proc_mounts *p)
+static int m_show(struct seq_file *m, void *v)
 {
-        struct mnt_namespace *ns = p->ns;
+        struct proc_mounts *p = container_of(m, struct proc_mounts, m);
-        int res = 0;
+        struct mount *r = list_entry(v, struct mount, mnt_list);
+        return p->show(m, &r->mnt);
-        br_read_lock(vfsmount_lock);
-        if (p->m.poll_event != ns->event) {
-                p->m.poll_event = ns->event;
-                res = 1;
-        }
-        br_read_unlock(vfsmount_lock);
-        return res;
-}
-struct proc_fs_info {
-        int flag;
-        const char *str;
-};
-static int show_sb_opts(struct seq_file *m, struct super_block *sb)
-{
-        static const struct proc_fs_info fs_info[] = {
-                { MS_SYNCHRONOUS, ",sync" },
-                { MS_DIRSYNC, ",dirsync" },
-                { MS_MANDLOCK, ",mand" },
-                { 0, NULL }
-        };
-        const struct proc_fs_info *fs_infop;
-        for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
-                if (sb->s_flags & fs_infop->flag)
-                        seq_puts(m, fs_infop->str);
-        }
-        return security_sb_show_options(m, sb);
-}
-static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
-{
-        static const struct proc_fs_info mnt_info[] = {
-                { MNT_NOSUID, ",nosuid" },
-                { MNT_NODEV, ",nodev" },
-                { MNT_NOEXEC, ",noexec" },
-                { MNT_NOATIME, ",noatime" },
-                { MNT_NODIRATIME, ",nodiratime" },
-                { MNT_RELATIME, ",relatime" },
-                { 0, NULL }
-        };
-        const struct proc_fs_info *fs_infop;
-        for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
-                if (mnt->mnt_flags & fs_infop->flag)
-                        seq_puts(m, fs_infop->str);
-        }
-}
-static void show_type(struct seq_file *m, struct super_block *sb)
-{
-        mangle(m, sb->s_type->name);
-        if (sb->s_subtype && sb->s_subtype[0]) {
-                seq_putc(m, '.');
-                mangle(m, sb->s_subtype);
-        }
-}
-static int show_vfsmnt(struct seq_file *m, void *v)
-{
-        struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
-        int err = 0;
-        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-        if (mnt->mnt_sb->s_op->show_devname) {
-                err = mnt->mnt_sb->s_op->show_devname(m, mnt);
-                if (err)
-                        goto out;
-        } else {
-                mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
-        }
-        seq_putc(m, ' ');
-        seq_path(m, &mnt_path, " \t\n\\");
-        seq_putc(m, ' ');
-        show_type(m, mnt->mnt_sb);
-        seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
-        err = show_sb_opts(m, mnt->mnt_sb);
-        if (err)
-                goto out;
-        show_mnt_opts(m, mnt);
-        if (mnt->mnt_sb->s_op->show_options)
-                err = mnt->mnt_sb->s_op->show_options(m, mnt);
-        seq_puts(m, " 0 0\n");
-out:
-        return err;
 }
 const struct seq_operations mounts_op = {
        .start  = m_start,
        .next   = m_next,
        .stop   = m_stop,
-        .show   = show_vfsmnt
+        .show   = m_show,
-};
-static int show_mountinfo(struct seq_file *m, void *v)
-{
-        struct proc_mounts *p = m->private;
-        struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
-        struct super_block *sb = mnt->mnt_sb;
-        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-        struct path root = p->root;
-        int err = 0;
-        seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
-                   MAJOR(sb->s_dev), MINOR(sb->s_dev));
-        if (sb->s_op->show_path)
-                err = sb->s_op->show_path(m, mnt);
-        else
-                seq_dentry(m, mnt->mnt_root, " \t\n\\");
-        if (err)
-                goto out;
-        seq_putc(m, ' ');
-        /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
-        err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
-        if (err)
-                goto out;
-        seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
-        show_mnt_opts(m, mnt);
-        /* Tagged fields ("foo:X" or "bar") */
-        if (IS_MNT_SHARED(mnt))
-                seq_printf(m, " shared:%i", mnt->mnt_group_id);
-        if (IS_MNT_SLAVE(mnt)) {
-                int master = mnt->mnt_master->mnt_group_id;
-                int dom = get_dominating_id(mnt, &p->root);
-                seq_printf(m, " master:%i", master);
-                if (dom && dom != master)
-                        seq_printf(m, " propagate_from:%i", dom);
-        }
-        if (IS_MNT_UNBINDABLE(mnt))
-                seq_puts(m, " unbindable");
-        /* Filesystem specific data */
-        seq_puts(m, " - ");
-        show_type(m, sb);
-        seq_putc(m, ' ');
-        if (sb->s_op->show_devname)
-                err = sb->s_op->show_devname(m, mnt);
-        else
-                mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
-        if (err)
-                goto out;
-        seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
-        err = show_sb_opts(m, sb);
-        if (err)
-                goto out;
-        if (sb->s_op->show_options)
-                err = sb->s_op->show_options(m, mnt);
-        seq_putc(m, '\n');
-out:
-        return err;
-}
-const struct seq_operations mountinfo_op = {
-        .start  = m_start,
-        .next   = m_next,
-        .stop   = m_stop,
-        .show   = show_mountinfo,
-};
-static int show_vfsstat(struct seq_file *m, void *v)
-{
-        struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
-        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-        int err = 0;
-        /* device */
-        if (mnt->mnt_sb->s_op->show_devname) {
-                seq_puts(m, "device ");
-                err = mnt->mnt_sb->s_op->show_devname(m, mnt);
-        } else {
-                if (mnt->mnt_devname) {
-                        seq_puts(m, "device ");
-                        mangle(m, mnt->mnt_devname);
-                } else
-                        seq_puts(m, "no device");
-        }
-        /* mount point */
-        seq_puts(m, " mounted on ");
-        seq_path(m, &mnt_path, " \t\n\\");
-        seq_putc(m, ' ');
-        /* file system type */
-        seq_puts(m, "with fstype ");
-        show_type(m, mnt->mnt_sb);
-        /* optional statistics */
-        if (mnt->mnt_sb->s_op->show_stats) {
-                seq_putc(m, ' ');
-                if (!err)
-                        err = mnt->mnt_sb->s_op->show_stats(m, mnt);
-        }
-        seq_putc(m, '\n');
-        return err;
-}
-const struct seq_operations mountstats_op = {
-        .start  = m_start,
-        .next   = m_next,
-        .stop   = m_stop,
-        .show   = show_vfsstat,
 };
 #endif  /* CONFIG_PROC_FS */
@@ -1152,11 +979,13 @@ const struct seq_operations mountstats_op = {
 * open files, pwds, chroots or sub mounts that are
 * busy.
 */
-int may_umount_tree(struct vfsmount *mnt)
+int may_umount_tree(struct vfsmount *m)
 {
+        struct mount *mnt = real_mount(m);
        int actual_refs = 0;
        int minimum_refs = 0;
-        struct vfsmount *p;
+        struct mount *p;
+        BUG_ON(!m);
        /* write lock needed for mnt_get_count */
        br_write_lock(vfsmount_lock);
@@ -1192,7 +1021,7 @@ int may_umount(struct vfsmount *mnt)
        int ret = 1;
        down_read(&namespace_sem);
        br_write_lock(vfsmount_lock);
-        if (propagate_mount_busy(mnt, 2))
+        if (propagate_mount_busy(real_mount(mnt), 2))
                ret = 0;
        br_write_unlock(vfsmount_lock);
        up_read(&namespace_sem);
@@ -1203,25 +1032,25 @@ EXPORT_SYMBOL(may_umount);
 void release_mounts(struct list_head *head)
 {
-        struct vfsmount *mnt;
+        struct mount *mnt;
        while (!list_empty(head)) {
-                mnt = list_first_entry(head, struct vfsmount, mnt_hash);
+                mnt = list_first_entry(head, struct mount, mnt_hash);
                list_del_init(&mnt->mnt_hash);
-                if (mnt->mnt_parent != mnt) {
+                if (mnt_has_parent(mnt)) {
                        struct dentry *dentry;
-                        struct vfsmount *m;
+                        struct mount *m;
                        br_write_lock(vfsmount_lock);
                        dentry = mnt->mnt_mountpoint;
                        m = mnt->mnt_parent;
-                        mnt->mnt_mountpoint = mnt->mnt_root;
+                        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
                        mnt->mnt_parent = mnt;
                        m->mnt_ghosts--;
                        br_write_unlock(vfsmount_lock);
                        dput(dentry);
-                        mntput(m);
+                        mntput(&m->mnt);
                }
-                mntput(mnt);
+                mntput(&mnt->mnt);
        }
 }
@@ -1229,10 +1058,10 @@ void release_mounts(struct list_head *head)
 * vfsmount lock must be held for write
 * namespace_sem must be held for write
 */
-void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
+void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
 {
        LIST_HEAD(tmp_list);
-        struct vfsmount *p;
+        struct mount *p;
        for (p = mnt; p; p = next_mnt(p, mnt))
                list_move(&p->mnt_hash, &tmp_list);
@@ -1247,24 +1076,24 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
                p->mnt_ns = NULL;
                __mnt_make_shortterm(p);
                list_del_init(&p->mnt_child);
-                if (p->mnt_parent != p) {
+                if (mnt_has_parent(p)) {
                        p->mnt_parent->mnt_ghosts++;
-                        dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
+                        dentry_reset_mounted(p->mnt_mountpoint);
                }
                change_mnt_propagation(p, MS_PRIVATE);
        }
        list_splice(&tmp_list, kill);
 }
-static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
+static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
-static int do_umount(struct vfsmount *mnt, int flags)
+static int do_umount(struct mount *mnt, int flags)
 {
-        struct super_block *sb = mnt->mnt_sb;
+        struct super_block *sb = mnt->mnt.mnt_sb;
        int retval;
        LIST_HEAD(umount_list);
-        retval = security_sb_umount(mnt, flags);
+        retval = security_sb_umount(&mnt->mnt, flags);
        if (retval)
                return retval;
@@ -1275,7 +1104,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
         *  (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
         */
        if (flags & MNT_EXPIRE) {
-                if (mnt == current->fs->root.mnt ||
+                if (&mnt->mnt == current->fs->root.mnt ||
                    flags & (MNT_FORCE | MNT_DETACH))
                        return -EINVAL;
@@ -1317,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
         * /reboot - static binary that would close all descriptors and
         * call reboot(9). Then init(8) could umount root and exec /reboot.
         */
-        if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
+        if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
                /*
                 * Special case for "unmounting" root ...
                 * we just try to remount it readonly.
@@ -1359,6 +1188,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
 SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 {
        struct path path;
+        struct mount *mnt;
        int retval;
        int lookup_flags = 0;
@@ -1371,21 +1201,22 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
        retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
        if (retval)
                goto out;
+        mnt = real_mount(path.mnt);
        retval = -EINVAL;
        if (path.dentry != path.mnt->mnt_root)
                goto dput_and_out;
-        if (!check_mnt(path.mnt))
+        if (!check_mnt(mnt))
                goto dput_and_out;
        retval = -EPERM;
        if (!capable(CAP_SYS_ADMIN))
                goto dput_and_out;
-        retval = do_umount(path.mnt, flags);
+        retval = do_umount(mnt, flags);
 dput_and_out:
        /* we mustn't call path_put() as that would clear mnt_expiry_mark */
        dput(path.dentry);
-        mntput_no_expire(path.mnt);
+        mntput_no_expire(mnt);
 out:
        return retval;
 }
@@ -1420,10 +1251,10 @@ static int mount_is_safe(struct path *path)
 #endif
 }
-struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
+struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                        int flag)
 {
-        struct vfsmount *res, *p, *q, *r, *s;
+        struct mount *res, *p, *q, *r;
        struct path path;
        if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
@@ -1436,6 +1267,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
        p = mnt;
        list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
+                struct mount *s;
                if (!is_subdir(r->mnt_mountpoint, dentry))
                        continue;
@@ -1449,9 +1281,9 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
                                q = q->mnt_parent;
                        }
                        p = s;
-                        path.mnt = q;
+                        path.mnt = &q->mnt;
                        path.dentry = p->mnt_mountpoint;
-                        q = clone_mnt(p, p->mnt_root, flag);
+                        q = clone_mnt(p, p->mnt.mnt_root, flag);
                        if (!q)
                                goto Enomem;
                        br_write_lock(vfsmount_lock);
@@ -1474,11 +1306,12 @@ Enomem:
 struct vfsmount *collect_mounts(struct path *path)
 {
-        struct vfsmount *tree;
+        struct mount *tree;
        down_write(&namespace_sem);
-        tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
+        tree = copy_tree(real_mount(path->mnt), path->dentry,
+                         CL_COPY_ALL | CL_PRIVATE);
        up_write(&namespace_sem);
-        return tree;
+        return tree ? &tree->mnt : NULL;
 }
 void drop_collected_mounts(struct vfsmount *mnt)
@@ -1486,7 +1319,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
        LIST_HEAD(umount_list);
        down_write(&namespace_sem);
        br_write_lock(vfsmount_lock);
-        umount_tree(mnt, 0, &umount_list);
+        umount_tree(real_mount(mnt), 0, &umount_list);
        br_write_unlock(vfsmount_lock);
        up_write(&namespace_sem);
        release_mounts(&umount_list);
@@ -1495,21 +1328,21 @@ void drop_collected_mounts(struct vfsmount *mnt)
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
                   struct vfsmount *root)
 {
-        struct vfsmount *mnt;
+        struct mount *mnt;
        int res = f(root, arg);
        if (res)
                return res;
-        list_for_each_entry(mnt, &root->mnt_list, mnt_list) {
+        list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
-                res = f(mnt, arg);
+                res = f(&mnt->mnt, arg);
                if (res)
                        return res;
        }
        return 0;
 }
-static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
+static void cleanup_group_ids(struct mount *mnt, struct mount *end)
 {
-        struct vfsmount *p;
+        struct mount *p;
        for (p = mnt; p != end; p = next_mnt(p, mnt)) {
                if (p->mnt_group_id && !IS_MNT_SHARED(p))
@@ -1517,9 +1350,9 @@ static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
        }
 }
-static int invent_group_ids(struct vfsmount *mnt, bool recurse)
+static int invent_group_ids(struct mount *mnt, bool recurse)
 {
-        struct vfsmount *p;
+        struct mount *p;
        for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
                if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
@@ -1597,13 +1430,13 @@ static int invent_group_ids(struct vfsmount *mnt, bool recurse)
 * Must be called without spinlocks held, since this function can sleep
 * in allocations.
 */
-static int attach_recursive_mnt(struct vfsmount *source_mnt,
+static int attach_recursive_mnt(struct mount *source_mnt,
                        struct path *path, struct path *parent_path)
 {
        LIST_HEAD(tree_list);
-        struct vfsmount *dest_mnt = path->mnt;
+        struct mount *dest_mnt = real_mount(path->mnt);
        struct dentry *dest_dentry = path->dentry;
-        struct vfsmount *child, *p;
+        struct mount *child, *p;
        int err;
        if (IS_MNT_SHARED(dest_mnt)) {
@@ -1624,7 +1457,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
        if (parent_path) {
                detach_mnt(source_mnt, parent_path);
                attach_mnt(source_mnt, path);
-                touch_mnt_namespace(parent_path->mnt->mnt_ns);
+                touch_mnt_namespace(source_mnt->mnt_ns);
        } else {
                mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
                commit_tree(source_mnt);
@@ -1672,13 +1505,13 @@ static void unlock_mount(struct path *path)
        mutex_unlock(&path->dentry->d_inode->i_mutex);
 }
-static int graft_tree(struct vfsmount *mnt, struct path *path)
+static int graft_tree(struct mount *mnt, struct path *path)
 {
-        if (mnt->mnt_sb->s_flags & MS_NOUSER)
+        if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
                return -EINVAL;
        if (S_ISDIR(path->dentry->d_inode->i_mode) !=
-              S_ISDIR(mnt->mnt_root->d_inode->i_mode))
+              S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
                return -ENOTDIR;
        if (d_unlinked(path->dentry))
@@ -1709,7 +1542,8 @@ static int flags_to_propagation_type(int flags)
 */
 static int do_change_type(struct path *path, int flag)
 {
-        struct vfsmount *m, *mnt = path->mnt;
+        struct mount *m;
+        struct mount *mnt = real_mount(path->mnt);
        int recurse = flag & MS_REC;
        int type;
        int err = 0;
@@ -1749,7 +1583,7 @@ static int do_loopback(struct path *path, char *old_name,
 {
        LIST_HEAD(umount_list);
        struct path old_path;
-        struct vfsmount *mnt = NULL;
+        struct mount *mnt = NULL, *old;
        int err = mount_is_safe(path);
        if (err)
                return err;
@@ -1763,18 +1597,20 @@ static int do_loopback(struct path *path, char *old_name,
        if (err)
                goto out;
+        old = real_mount(old_path.mnt);
        err = -EINVAL;
-        if (IS_MNT_UNBINDABLE(old_path.mnt))
+        if (IS_MNT_UNBINDABLE(old))
                goto out2;
-        if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
+        if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
                goto out2;
        err = -ENOMEM;
        if (recurse)
-                mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
+                mnt = copy_tree(old, old_path.dentry, 0);
        else
-                mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
+                mnt = clone_mnt(old, old_path.dentry, 0);
        if (!mnt)
                goto out2;
@@ -1804,9 +1640,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
                return 0;
        if (readonly_request)
-                error = mnt_make_readonly(mnt);
+                error = mnt_make_readonly(real_mount(mnt));
        else
-                __mnt_unmake_readonly(mnt);
+                __mnt_unmake_readonly(real_mount(mnt));
        return error;
 }
@@ -1820,11 +1656,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 {
        int err;
        struct super_block *sb = path->mnt->mnt_sb;
+        struct mount *mnt = real_mount(path->mnt);
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        if (!check_mnt(path->mnt))
+        if (!check_mnt(mnt))
                return -EINVAL;
        if (path->dentry != path->mnt->mnt_root)
@@ -1841,22 +1678,22 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
                err = do_remount_sb(sb, flags, data, 0);
        if (!err) {
                br_write_lock(vfsmount_lock);
-                mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
+                mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
-                path->mnt->mnt_flags = mnt_flags;
+                mnt->mnt.mnt_flags = mnt_flags;
                br_write_unlock(vfsmount_lock);
        }
        up_write(&sb->s_umount);
        if (!err) {
                br_write_lock(vfsmount_lock);
-                touch_mnt_namespace(path->mnt->mnt_ns);
+                touch_mnt_namespace(mnt->mnt_ns);
                br_write_unlock(vfsmount_lock);
        }
        return err;
 }
-static inline int tree_contains_unbindable(struct vfsmount *mnt)
+static inline int tree_contains_unbindable(struct mount *mnt)
 {
-        struct vfsmount *p;
+        struct mount *p;
        for (p = mnt; p; p = next_mnt(p, mnt)) {
                if (IS_MNT_UNBINDABLE(p))
                        return 1;
@@ -1867,7 +1704,8 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
 static int do_move_mount(struct path *path, char *old_name)
 {
        struct path old_path, parent_path;
-        struct vfsmount *p;
+        struct mount *p;
+        struct mount *old;
        int err = 0;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -1881,8 +1719,11 @@ static int do_move_mount(struct path *path, char *old_name)
        if (err < 0)
                goto out;
+        old = real_mount(old_path.mnt);
+        p = real_mount(path->mnt);
        err = -EINVAL;
-        if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
+        if (!check_mnt(p) || !check_mnt(old))
                goto out1;
        if (d_unlinked(path->dentry))
@@ -1892,7 +1733,7 @@ static int do_move_mount(struct path *path, char *old_name)
        if (old_path.dentry != old_path.mnt->mnt_root)
                goto out1;
-        if (old_path.mnt == old_path.mnt->mnt_parent)
+        if (!mnt_has_parent(old))
                goto out1;
        if (S_ISDIR(path->dentry->d_inode->i_mode) !=
@@ -1901,28 +1742,26 @@ static int do_move_mount(struct path *path, char *old_name)
        /*
         * Don't move a mount residing in a shared parent.
         */
-        if (old_path.mnt->mnt_parent &&
+        if (IS_MNT_SHARED(old->mnt_parent))
-            IS_MNT_SHARED(old_path.mnt->mnt_parent))
                goto out1;
        /*
         * Don't move a mount tree containing unbindable mounts to a destination
         * mount which is shared.
         */
-        if (IS_MNT_SHARED(path->mnt) &&
+        if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
-            tree_contains_unbindable(old_path.mnt))
                goto out1;
        err = -ELOOP;
-        for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
+        for (; mnt_has_parent(p); p = p->mnt_parent)
-                if (p == old_path.mnt)
+                if (p == old)
                        goto out1;
-        err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
+        err = attach_recursive_mnt(old, path, &parent_path);
        if (err)
                goto out1;
        /* if the mount is moved, it should no longer be expire
         * automatically */
-        list_del_init(&old_path.mnt->mnt_expire);
+        list_del_init(&old->mnt_expire);
 out1:
        unlock_mount(path);
 out:
@@ -1955,7 +1794,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
        return ERR_PTR(err);
 }
-struct vfsmount *
+static struct vfsmount *
 do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 {
        struct file_system_type *type = get_fs_type(fstype);
@@ -1969,12 +1808,11 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
        put_filesystem(type);
        return mnt;
 }
-EXPORT_SYMBOL_GPL(do_kern_mount);
 /*
 * add a mount into a namespace's mount tree
 */
-static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
+static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 {
        int err;
@@ -1985,20 +1823,20 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag
                return err;
        err = -EINVAL;
-        if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
+        if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
                goto unlock;
        /* Refuse the same filesystem on the same mount point */
        err = -EBUSY;
-        if (path->mnt->mnt_sb == newmnt->mnt_sb &&
+        if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
            path->mnt->mnt_root == path->dentry)
                goto unlock;
        err = -EINVAL;
-        if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
+        if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
                goto unlock;
-        newmnt->mnt_flags = mnt_flags;
+        newmnt->mnt.mnt_flags = mnt_flags;
        err = graft_tree(newmnt, path);
 unlock:
@@ -2027,7 +1865,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);
-        err = do_add_mount(mnt, path, mnt_flags);
+        err = do_add_mount(real_mount(mnt), path, mnt_flags);
        if (err)
                mntput(mnt);
        return err;
@@ -2035,11 +1873,12 @@ static int do_new_mount(struct path *path, char *type, int flags,
 int finish_automount(struct vfsmount *m, struct path *path)
 {
+        struct mount *mnt = real_mount(m);
        int err;
        /* The new mount record should have at least 2 refs to prevent it being
         * expired before we get a chance to add it
         */
-        BUG_ON(mnt_get_count(m) < 2);
+        BUG_ON(mnt_get_count(mnt) < 2);
        if (m->mnt_sb == path->mnt->mnt_sb &&
            m->mnt_root == path->dentry) {
@@ -2047,15 +1886,15 @@ int finish_automount(struct vfsmount *m, struct path *path)
                goto fail;
        }
-        err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
+        err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
        if (!err)
                return 0;
 fail:
        /* remove m from any expiration list it may be on */
-        if (!list_empty(&m->mnt_expire)) {
+        if (!list_empty(&mnt->mnt_expire)) {
                down_write(&namespace_sem);
                br_write_lock(vfsmount_lock);
-                list_del_init(&m->mnt_expire);
+                list_del_init(&mnt->mnt_expire);
                br_write_unlock(vfsmount_lock);
                up_write(&namespace_sem);
        }
@@ -2074,7 +1913,7 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
        down_write(&namespace_sem);
        br_write_lock(vfsmount_lock);
-        list_add_tail(&mnt->mnt_expire, expiry_list);
+        list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
        br_write_unlock(vfsmount_lock);
        up_write(&namespace_sem);
@@ -2088,7 +1927,7 @@ EXPORT_SYMBOL(mnt_set_expiry);
 */
 void mark_mounts_for_expiry(struct list_head *mounts)
 {
-        struct vfsmount *mnt, *next;
+        struct mount *mnt, *next;
        LIST_HEAD(graveyard);
        LIST_HEAD(umounts);
@@ -2111,7 +1950,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
                list_move(&mnt->mnt_expire, &graveyard);
        }
        while (!list_empty(&graveyard)) {
-                mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
+                mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
                touch_mnt_namespace(mnt->mnt_ns);
                umount_tree(mnt, 1, &umounts);
        }
@@ -2129,9 +1968,9 @@ EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
 * search the list of submounts for a given mountpoint, and move any
 * shrinkable submounts to the 'graveyard' list.
 */
-static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
+static int select_submounts(struct mount *parent, struct list_head *graveyard)
 {
-        struct vfsmount *this_parent = parent;
+        struct mount *this_parent = parent;
        struct list_head *next;
        int found = 0;
@@ -2140,10 +1979,10 @@ repeat:
 resume:
        while (next != &this_parent->mnt_mounts) {
                struct list_head *tmp = next;
-                struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
+                struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
                next = tmp->next;
-                if (!(mnt->mnt_flags & MNT_SHRINKABLE))
+                if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
                        continue;
                /*
                 * Descend a level if the d_mounts list is non-empty.
@@ -2175,15 +2014,15 @@ resume:
 *
 * vfsmount_lock must be held for write
 */
-static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
+static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
 {
        LIST_HEAD(graveyard);
-        struct vfsmount *m;
+        struct mount *m;
        /* extract submounts of 'mountpoint' from the expiration list */
        while (select_submounts(mnt, &graveyard)) {
                while (!list_empty(&graveyard)) {
-                        m = list_first_entry(&graveyard, struct vfsmount,
+                        m = list_first_entry(&graveyard, struct mount,
                                                mnt_expire);
                        touch_mnt_namespace(m->mnt_ns);
                        umount_tree(m, 1, umounts);
@@ -2370,12 +2209,13 @@ static struct mnt_namespace *alloc_mnt_ns(void)
 void mnt_make_longterm(struct vfsmount *mnt)
 {
-        __mnt_make_longterm(mnt);
+        __mnt_make_longterm(real_mount(mnt));
 }
-void mnt_make_shortterm(struct vfsmount *mnt)
+void mnt_make_shortterm(struct vfsmount *m)
 {
 #ifdef CONFIG_SMP
+        struct mount *mnt = real_mount(m);
        if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
                return;
        br_write_lock(vfsmount_lock);
@@ -2393,7 +2233,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
 {
        struct mnt_namespace *new_ns;
        struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
-        struct vfsmount *p, *q;
+        struct mount *p, *q;
+        struct mount *old = mnt_ns->root;
+        struct mount *new;
        new_ns = alloc_mnt_ns();
        if (IS_ERR(new_ns))
@@ -2401,15 +2243,15 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        down_write(&namespace_sem);
        /* First pass: copy the tree topology */
-        new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
+        new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
-                                        CL_COPY_ALL | CL_EXPIRE);
+        if (!new) {
-        if (!new_ns->root) {
                up_write(&namespace_sem);
                kfree(new_ns);
                return ERR_PTR(-ENOMEM);
        }
+        new_ns->root = new;
        br_write_lock(vfsmount_lock);
-        list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
+        list_add_tail(&new_ns->list, &new->mnt_list);
        br_write_unlock(vfsmount_lock);
        /*
@@ -2417,27 +2259,27 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
         * as belonging to new namespace.  We have already acquired a private
         * fs_struct, so tsk->fs->lock is not needed.
         */
-        p = mnt_ns->root;
+        p = old;
-        q = new_ns->root;
+        q = new;
        while (p) {
                q->mnt_ns = new_ns;
                __mnt_make_longterm(q);
                if (fs) {
-                        if (p == fs->root.mnt) {
+                        if (&p->mnt == fs->root.mnt) {
-                                fs->root.mnt = mntget(q);
+                                fs->root.mnt = mntget(&q->mnt);
                                __mnt_make_longterm(q);
-                                mnt_make_shortterm(p);
+                                mnt_make_shortterm(&p->mnt);
-                                rootmnt = p;
+                                rootmnt = &p->mnt;
                        }
-                        if (p == fs->pwd.mnt) {
+                        if (&p->mnt == fs->pwd.mnt) {
-                                fs->pwd.mnt = mntget(q);
+                                fs->pwd.mnt = mntget(&q->mnt);
                                __mnt_make_longterm(q);
-                                mnt_make_shortterm(p);
+                                mnt_make_shortterm(&p->mnt);
-                                pwdmnt = p;
+                                pwdmnt = &p->mnt;
                        }
                }
-                p = next_mnt(p, mnt_ns->root);
+                p = next_mnt(p, old);
-                q = next_mnt(q, new_ns->root);
+                q = next_mnt(q, new);
        }
        up_write(&namespace_sem);
@@ -2470,22 +2312,20 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 * create_mnt_ns - creates a private namespace and adds a root filesystem
 * @mnt: pointer to the new root filesystem mountpoint
 */
-struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
 {
-        struct mnt_namespace *new_ns;
+        struct mnt_namespace *new_ns = alloc_mnt_ns();
-        new_ns = alloc_mnt_ns();
        if (!IS_ERR(new_ns)) {
+                struct mount *mnt = real_mount(m);
                mnt->mnt_ns = new_ns;
                __mnt_make_longterm(mnt);
                new_ns->root = mnt;
-                list_add(&new_ns->list, &new_ns->root->mnt_list);
+                list_add(&new_ns->list, &mnt->mnt_list);
        } else {
-                mntput(mnt);
+                mntput(m);
        }
        return new_ns;
 }
-EXPORT_SYMBOL(create_mnt_ns);
 struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
 {
@@ -2559,6 +2399,31 @@ out_type:
 }
 /*
+ * Return true if path is reachable from root
+ *
+ * namespace_sem or vfsmount_lock is held
+ */
+bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
+                         const struct path *root)
+{
+        while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
+                dentry = mnt->mnt_mountpoint;
+                mnt = mnt->mnt_parent;
+        }
+        return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
+}
+int path_is_under(struct path *path1, struct path *path2)
+{
+        int res;
+        br_read_lock(vfsmount_lock);
+        res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
+        br_read_unlock(vfsmount_lock);
+        return res;
+}
+EXPORT_SYMBOL(path_is_under);
+/*
 * pivot_root Semantics:
 * Moves the root file system of the current process to the directory put_old,
 * makes new_root as the new root file system of the current process, and sets
@@ -2586,8 +2451,8 @@ out_type:
 SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
                const char __user *, put_old)
 {
-        struct vfsmount *tmp;
        struct path new, old, parent_path, root_parent, root;
+        struct mount *new_mnt, *root_mnt;
        int error;
        if (!capable(CAP_SYS_ADMIN))
@@ -2611,11 +2476,13 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
                goto out3;
        error = -EINVAL;
-        if (IS_MNT_SHARED(old.mnt) ||
+        new_mnt = real_mount(new.mnt);
-                IS_MNT_SHARED(new.mnt->mnt_parent) ||
+        root_mnt = real_mount(root.mnt);
-                IS_MNT_SHARED(root.mnt->mnt_parent))
+        if (IS_MNT_SHARED(real_mount(old.mnt)) ||
+                IS_MNT_SHARED(new_mnt->mnt_parent) ||
+                IS_MNT_SHARED(root_mnt->mnt_parent))
                goto out4;
-        if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
+        if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
                goto out4;
        error = -ENOENT;
        if (d_unlinked(new.dentry))
@@ -2629,33 +2496,22 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
        error = -EINVAL;
        if (root.mnt->mnt_root != root.dentry)
                goto out4; /* not a mountpoint */
-        if (root.mnt->mnt_parent == root.mnt)
+        if (!mnt_has_parent(root_mnt))
                goto out4; /* not attached */
        if (new.mnt->mnt_root != new.dentry)
                goto out4; /* not a mountpoint */
-        if (new.mnt->mnt_parent == new.mnt)
+        if (!mnt_has_parent(new_mnt))
                goto out4; /* not attached */
        /* make sure we can reach put_old from new_root */
-        tmp = old.mnt;
+        if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
-        if (tmp != new.mnt) {
-                for (;;) {
-                        if (tmp->mnt_parent == tmp)
-                                goto out4; /* already mounted on put_old */
-                        if (tmp->mnt_parent == new.mnt)
-                                break;
-                        tmp = tmp->mnt_parent;
-                }
-                if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
-                        goto out4;
-        } else if (!is_subdir(old.dentry, new.dentry))
                goto out4;
        br_write_lock(vfsmount_lock);
-        detach_mnt(new.mnt, &parent_path);
+        detach_mnt(new_mnt, &parent_path);
-        detach_mnt(root.mnt, &root_parent);
+        detach_mnt(root_mnt, &root_parent);
        /* mount old root on put_old */
-        attach_mnt(root.mnt, &old);
+        attach_mnt(root_mnt, &old);
        /* mount new_root on / */
-        attach_mnt(new.mnt, &root_parent);
+        attach_mnt(new_mnt, &root_parent);
        touch_mnt_namespace(current->nsproxy->mnt_ns);
        br_write_unlock(vfsmount_lock);
        chroot_fs_refs(&root, &new);
@@ -2693,8 +2549,8 @@ static void __init init_mount_tree(void)
        init_task.nsproxy->mnt_ns = ns;
        get_mnt_ns(ns);
-        root.mnt = ns->root;
+        root.mnt = mnt;
-        root.dentry = ns->root->mnt_root;
+        root.dentry = mnt->mnt_root;
        set_fs_pwd(current->fs, &root);
        set_fs_root(current->fs, &root);
@@ -2707,7 +2563,7 @@ void __init mnt_init(void)
        init_rwsem(&namespace_sem);
-        mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
+        mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
                        0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
        mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
@@ -2747,7 +2603,6 @@ void put_mnt_ns(struct mnt_namespace *ns)
        release_mounts(&umount_list);
        kfree(ns);
 }
-EXPORT_SYMBOL(put_mnt_ns);
 struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
@@ -2776,5 +2631,5 @@ EXPORT_SYMBOL(kern_unmount);
 bool our_mnt(struct vfsmount *mnt)
 {
-        return check_mnt(mnt);
+        return check_mnt(real_mount(mnt));
 }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c51f621e901..aeed93a6bde0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,15 +30,15 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
 static int ncp_readdir(struct file *, void *, filldir_t);
-static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
 static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
 static int ncp_unlink(struct inode *, struct dentry *);
-static int ncp_mkdir(struct inode *, struct dentry *, int);
+static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
 static int ncp_rmdir(struct inode *, struct dentry *);
 static int ncp_rename(struct inode *, struct dentry *,
                      struct inode *, struct dentry *);
 static int ncp_mknod(struct inode * dir, struct dentry *dentry,
-                     int mode, dev_t rdev);
+                     umode_t mode, dev_t rdev);
 #if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
 extern int ncp_symlink(struct inode *, struct dentry *, const char *);
 #else
@@ -919,7 +919,7 @@ out_close:
        goto out;
 }
-int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
+int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
                   dev_t rdev, __le32 attributes)
 {
        struct ncp_server *server = NCP_SERVER(dir);
@@ -928,7 +928,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
        int opmode;
        __u8 __name[NCP_MAXPATHLEN + 1];
        
-        PPRINTK("ncp_create_new: creating %s/%s, mode=%x\n",
+        PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n",
                dentry->d_parent->d_name.name, dentry->d_name.name, mode);
        ncp_age_dentry(server, dentry);
@@ -979,13 +979,13 @@ out:
        return error;
 }
-static int ncp_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        return ncp_create_new(dir, dentry, mode, 0, 0);
 }
-static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct ncp_entry_info finfo;
        struct ncp_server *server = NCP_SERVER(dir);
@@ -1201,12 +1201,12 @@ out:
 }
 static int ncp_mknod(struct inode * dir, struct dentry *dentry,
-                     int mode, dev_t rdev)
+                     umode_t mode, dev_t rdev)
 {
        if (!new_valid_dev(rdev))
                return -EINVAL;
        if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
-                DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%o\n", mode);
+                DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode);
                return ncp_create_new(dir, dentry, mode, rdev, 0);
        }
        return -EPERM; /* Strange, but true */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 5b5fa33b6b9d..3d1e34f8a68e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -44,7 +44,7 @@
 static void ncp_evict_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
-static int  ncp_show_options(struct seq_file *, struct vfsmount *);
+static int  ncp_show_options(struct seq_file *, struct dentry *);
 static struct kmem_cache * ncp_inode_cachep;
@@ -60,7 +60,6 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
 static void ncp_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
 }
@@ -323,9 +322,9 @@ static void ncp_stop_tasks(struct ncp_server *server) {
                flush_work_sync(&server->timeout_tq);
 }
-static int  ncp_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int  ncp_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct ncp_server *server = NCP_SBP(mnt->mnt_sb);
+        struct ncp_server *server = NCP_SBP(root->d_sb);
        unsigned int tmp;
        if (server->m.uid != 0)
@@ -548,7 +547,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
        if (error)
-                goto out_bdi;
+                goto out_fput;
        server->ncp_filp = ncp_filp;
        server->ncp_sock = sock;
@@ -559,7 +558,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
                error = -EBADF;
                server->info_filp = fget(data.info_fd);
                if (!server->info_filp)
-                        goto out_fput;
+                        goto out_bdi;
                error = -ENOTSOCK;
                sock_inode = server->info_filp->f_path.dentry->d_inode;
                if (!S_ISSOCK(sock_inode->i_mode))
@@ -746,9 +745,9 @@ out_nls:
 out_fput2:
        if (server->info_filp)
                fput(server->info_filp);
-out_fput:
-        bdi_destroy(&server->bdi);
 out_bdi:
+        bdi_destroy(&server->bdi);
+out_fput:
        /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
         * 
         * The previously used put_filp(ncp_filp); was bogus, since
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 790e92a9ec63..6958adfaff08 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -901,7 +901,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        ret = __ncp_ioctl(inode, cmd, arg);
 outDropWrite:
        if (need_drop_write)
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
 out:
        return ret;
 }
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 09881e6aa5ad..32c06587351a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -114,7 +114,7 @@ int ncp_dirhandle_alloc(struct ncp_server *, __u8 vol, __le32 dirent, __u8 *dirh
 int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle);
 int ncp_create_new(struct inode *dir, struct dentry *dentry,
-                          int mode, dev_t rdev, __le32 attributes);
+                          umode_t mode, dev_t rdev, __le32 attributes);
 static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) {
 #ifdef CONFIG_NCPFS_NFS_NS
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 661f861d80c6..52439ddc8de0 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -108,7 +108,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
        char *rawlink;
        int length, err, i, outlen;
        int kludge;
-        int mode;
+        umode_t mode;
        __le32 attr;
        unsigned int hdr;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac2899098147..fd9a872fada0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -47,13 +47,13 @@ static int nfs_opendir(struct inode *, struct file *);
 static int nfs_closedir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
+static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
-static int nfs_mkdir(struct inode *, struct dentry *, int);
+static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
 static int nfs_rmdir(struct inode *, struct dentry *);
 static int nfs_unlink(struct inode *, struct dentry *);
 static int nfs_symlink(struct inode *, struct dentry *, const char *);
 static int nfs_link(struct dentry *, struct inode *, struct dentry *);
-static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
+static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static int nfs_rename(struct inode *, struct dentry *,
                      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
@@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
        .create         = nfs_open_create,
        .lookup         = nfs_atomic_lookup,
@@ -1368,18 +1368,7 @@ static fmode_t flags_to_mode(int flags)
 static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
 {
-        struct nfs_open_context *ctx;
+        return alloc_nfs_open_context(dentry, flags_to_mode(open_flags));
-        struct rpc_cred *cred;
-        fmode_t fmode = flags_to_mode(open_flags);
-        cred = rpc_lookup_cred();
-        if (IS_ERR(cred))
-                return ERR_CAST(cred);
-        ctx = alloc_nfs_open_context(dentry, cred, fmode);
-        put_rpccred(cred);
-        if (ctx == NULL)
-                return ERR_PTR(-ENOMEM);
-        return ctx;
 }
 static int do_open(struct inode *inode, struct file *filp)
@@ -1584,8 +1573,8 @@ no_open:
        return nfs_lookup_revalidate(dentry, nd);
 }
-static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
+static int nfs_open_create(struct inode *dir, struct dentry *dentry,
-                struct nameidata *nd)
+                umode_t mode, struct nameidata *nd)
 {
        struct nfs_open_context *ctx = NULL;
        struct iattr attr;
@@ -1675,8 +1664,8 @@ out_error:
 * that the operation succeeded on the server, but an error in the
 * reply path made it appear to have failed.
 */
-static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int nfs_create(struct inode *dir, struct dentry *dentry,
-                struct nameidata *nd)
+                umode_t mode, struct nameidata *nd)
 {
        struct iattr attr;
        int error;
@@ -1704,7 +1693,7 @@ out_err:
 * See comments for nfs_proc_create regarding failed operations.
 */
 static int
-nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct iattr attr;
        int status;
@@ -1730,7 +1719,7 @@ out_err:
 /*
 * See comments for nfs_proc_create regarding failed operations.
 */
-static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct iattr attr;
        int error;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eca56d4b39c0..606ef0f20aed 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -147,7 +147,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
         * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
         * the cached file length
         */
-        if (origin != SEEK_SET || origin != SEEK_CUR) {
+        if (origin != SEEK_SET && origin != SEEK_CUR) {
                struct inode *inode = filp->f_mapping->host;
                int retval = nfs_revalidate_file_size(inode, filp);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..81db25e92e10 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,6 +38,7 @@
 #include <linux/nfs_xdr.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/freezer.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word)
 {
        if (fatal_signal_pending(current))
                return -ERESTARTSYS;
-        schedule();
+        freezable_schedule();
        return 0;
 }
@@ -629,23 +630,28 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
        nfs_revalidate_inode(server, inode);
 }
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode)
+struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
 {
        struct nfs_open_context *ctx;
+        struct rpc_cred *cred = rpc_lookup_cred();
+        if (IS_ERR(cred))
+                return ERR_CAST(cred);
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
-        if (ctx != NULL) {
+        if (!ctx) {
-                nfs_sb_active(dentry->d_sb);
+                put_rpccred(cred);
-                ctx->dentry = dget(dentry);
+                return ERR_PTR(-ENOMEM);
-                ctx->cred = get_rpccred(cred);
-                ctx->state = NULL;
-                ctx->mode = f_mode;
-                ctx->flags = 0;
-                ctx->error = 0;
-                nfs_init_lock_context(&ctx->lock_context);
-                ctx->lock_context.open_context = ctx;
-                INIT_LIST_HEAD(&ctx->list);
        }
+        nfs_sb_active(dentry->d_sb);
+        ctx->dentry = dget(dentry);
+        ctx->cred = cred;
+        ctx->state = NULL;
+        ctx->mode = f_mode;
+        ctx->flags = 0;
+        ctx->error = 0;
+        nfs_init_lock_context(&ctx->lock_context);
+        ctx->lock_context.open_context = ctx;
+        INIT_LIST_HEAD(&ctx->list);
        return ctx;
 }
@@ -738,15 +744,10 @@ static void nfs_file_clear_open_context(struct file *filp)
 int nfs_open(struct inode *inode, struct file *filp)
 {
        struct nfs_open_context *ctx;
-        struct rpc_cred *cred;
-        cred = rpc_lookup_cred();
+        ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
-        if (IS_ERR(cred))
+        if (IS_ERR(ctx))
-                return PTR_ERR(cred);
+                return PTR_ERR(ctx);
-        ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
-        put_rpccred(cred);
-        if (ctx == NULL)
-                return -ENOMEM;
        nfs_file_set_open_context(filp, ctx);
        put_nfs_open_context(ctx);
        nfs_fscache_set_inode_cookie(inode, filp);
@@ -1464,7 +1465,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
 static void nfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d4bc9ed91748..91943953a370 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfs_mount.h>
+#include <linux/freezer.h>
 #include "iostat.h"
 #include "internal.h"
@@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
                res = rpc_call_sync(clnt, msg, flags);
                if (res != -EJUKEBOX && res != -EKEYEXPIRED)
                        break;
-                schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+                freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
                res = -ERESTARTSYS;
        } while (!fatal_signal_pending(current));
        return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be2bbac13817..dcda0ba7af60 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -39,6 +39,8 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/string.h>
+#include <linux/ratelimit.h>
+#include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/gss_api.h>
@@ -53,6 +55,7 @@
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
+#include <linux/freezer.h>
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -241,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
                *timeout = NFS4_POLL_RETRY_MIN;
        if (*timeout > NFS4_POLL_RETRY_MAX)
                *timeout = NFS4_POLL_RETRY_MAX;
-        schedule_timeout_killable(*timeout);
+        freezable_schedule_timeout_killable(*timeout);
        if (fatal_signal_pending(current))
                res = -ERESTARTSYS;
        *timeout <<= 1;
@@ -894,6 +897,8 @@ out:
 static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
 {
+        if (delegation == NULL)
+                return 0;
        if ((delegation->type & fmode) != fmode)
                return 0;
        if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
@@ -1036,8 +1041,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
                }
                rcu_read_lock();
                delegation = rcu_dereference(nfsi->delegation);
-                if (delegation == NULL ||
+                if (!can_open_delegated(delegation, fmode)) {
-                    !can_open_delegated(delegation, fmode)) {
                        rcu_read_unlock();
                        break;
                }
@@ -1091,7 +1095,12 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
                if (delegation)
                        delegation_flags = delegation->flags;
                rcu_read_unlock();
-                if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
+                if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
+                        pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
+                                        "returning a delegation for "
+                                        "OPEN(CLAIM_DELEGATE_CUR)\n",
+                                        NFS_CLIENT(inode)->cl_server);
+                } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
                        nfs_inode_set_delegation(state->inode,
                                        data->owner->so_cred,
                                        &data->o_res);
@@ -1423,11 +1432,9 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
                        goto out_no_action;
                rcu_read_lock();
                delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
-                if (delegation != NULL &&
+                if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR &&
-                    test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) {
+                    can_open_delegated(delegation, data->o_arg.fmode))
-                        rcu_read_unlock();
+                        goto unlock_no_action;
-                        goto out_no_action;
-                }
                rcu_read_unlock();
        }
        /* Update sequence id. */
@@ -1444,6 +1451,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
                return;
        rpc_call_start(task);
        return;
+unlock_no_action:
+        rcu_read_unlock();
 out_no_action:
        task->tk_action = NULL;
@@ -3950,7 +3959,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 static unsigned long
 nfs4_set_lock_task_retry(unsigned long timeout)
 {
-        schedule_timeout_killable(timeout);
+        freezable_schedule_timeout_killable(timeout);
        timeout <<= 1;
        if (timeout > NFS4_LOCK_MAXTIMEOUT)
                return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 39914be40b03..6a7107ae6b72 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1156,11 +1156,13 @@ restart:
                if (status >= 0) {
                        status = nfs4_reclaim_locks(state, ops);
                        if (status >= 0) {
+                                spin_lock(&state->state_lock);
                                list_for_each_entry(lock, &state->lock_states, ls_locks) {
                                        if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
                                                printk("%s: Lock reclaim failed!\n",
                                                        __func__);
                                }
+                                spin_unlock(&state->state_lock);
                                nfs4_put_open_state(state);
                                goto restart;
                        }
@@ -1224,10 +1226,12 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
        clear_bit(NFS_O_RDONLY_STATE, &state->flags);
        clear_bit(NFS_O_WRONLY_STATE, &state->flags);
        clear_bit(NFS_O_RDWR_STATE, &state->flags);
+        spin_lock(&state->state_lock);
        list_for_each_entry(lock, &state->lock_states, ls_locks) {
                lock->ls_seqid.flags = 0;
                lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
        }
+        spin_unlock(&state->state_lock);
 }
 static void nfs4_reset_seqids(struct nfs_server *server,
@@ -1350,12 +1354,14 @@ static void nfs4_warn_keyexpired(const char *s)
 static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 {
        switch (error) {
+                case 0:
+                        break;
                case -NFS4ERR_CB_PATH_DOWN:
                        nfs_handle_cb_pathdown(clp);
-                        return 0;
+                        break;
                case -NFS4ERR_NO_GRACE:
                        nfs4_state_end_reclaim_reboot(clp);
-                        return 0;
+                        break;
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_LEASE_MOVED:
                        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
@@ -1375,13 +1381,15 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
                case -NFS4ERR_SEQ_MISORDERED:
                        set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
                        /* Zero session reset errors */
-                        return 0;
+                        break;
                case -EKEYEXPIRED:
                        /* Nothing we can do */
                        nfs4_warn_keyexpired(clp->cl_hostname);
-                        return 0;
+                        break;
+                default:
+                        return error;
        }
-        return error;
+        return 0;
 }
 static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
@@ -1428,7 +1436,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
        struct rpc_cred *cred;
        const struct nfs4_state_maintenance_ops *ops =
                clp->cl_mvops->state_renewal_ops;
-        int status = -NFS4ERR_EXPIRED;
+        int status;
        /* Is the client already known to have an expired lease? */
        if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
@@ -1438,6 +1446,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
        spin_unlock(&clp->cl_lock);
        if (cred == NULL) {
                cred = nfs4_get_setclientid_cred(clp);
+                status = -ENOKEY;
                if (cred == NULL)
                        goto out;
        }
@@ -1525,16 +1534,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
 {
        if (!flags)
                return;
-        else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
+        if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
                nfs41_handle_server_reboot(clp);
-        else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
+        if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
                            SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
                            SEQ4_STATUS_ADMIN_STATE_REVOKED |
                            SEQ4_STATUS_LEASE_MOVED))
                nfs41_handle_state_revoked(clp);
-        else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
+        if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
                nfs41_handle_recallable_state_revoked(clp);
-        else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+        if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
                            SEQ4_STATUS_BACKCHANNEL_FAULT |
                            SEQ4_STATUS_CB_PATH_DOWN_SESSION))
                nfs41_handle_cb_path_down(clp);
@@ -1662,10 +1671,10 @@ static void nfs4_state_manager(struct nfs_client *clp)
                if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
                        status = nfs4_check_lease(clp);
+                        if (status < 0)
+                                goto out_error;
                        if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
                                continue;
-                        if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN)
-                                goto out_error;
                }
                /* Initialize or reset the session */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f48125da198a..0c672588fe5a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
+#include <linux/freezer.h>
 #include "internal.h"
 #define NFSDBG_FACILITY         NFSDBG_PROC
@@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
                res = rpc_call_sync(clnt, msg, flags);
                if (res != -EKEYEXPIRED)
                        break;
-                schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+                freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
                res = -ERESTARTSYS;
        } while (!fatal_signal_pending(current));
        return res;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 134777406ee3..e463967aafb8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -41,7 +41,6 @@
 #include <linux/lockd/bind.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/mnt_namespace.h>
 #include <linux/namei.h>
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
@@ -263,10 +262,10 @@ static match_table_t nfs_local_lock_tokens = {
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
-static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_options(struct seq_file *, struct dentry *);
-static int  nfs_show_devname(struct seq_file *, struct vfsmount *);
+static int  nfs_show_devname(struct seq_file *, struct dentry *);
-static int  nfs_show_path(struct seq_file *, struct vfsmount *);
+static int  nfs_show_path(struct seq_file *, struct dentry *);
-static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct dentry *);
 static struct dentry *nfs_fs_mount(struct file_system_type *,
                int, const char *, void *);
 static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -721,9 +720,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 /*
 * Describe the mount options on this VFS mountpoint
 */
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_options(struct seq_file *m, struct dentry *root)
 {
-        struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+        struct nfs_server *nfss = NFS_SB(root->d_sb);
        nfs_show_mount_options(m, nfss, 0);
@@ -761,14 +760,14 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
 #endif
 #endif
-static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_devname(struct seq_file *m, struct dentry *root)
 {
        char *page = (char *) __get_free_page(GFP_KERNEL);
        char *devname, *dummy;
        int err = 0;
        if (!page)
                return -ENOMEM;
-        devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
+        devname = nfs_path(&dummy, root, page, PAGE_SIZE);
        if (IS_ERR(devname))
                err = PTR_ERR(devname);
        else
@@ -777,7 +776,7 @@ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
        return err;
 }
-static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_path(struct seq_file *m, struct dentry *dentry)
 {
        seq_puts(m, "/");
        return 0;
@@ -786,10 +785,10 @@ static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
 /*
 * Present statistical information for this VFS mountpoint
 */
-static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+static int nfs_show_stats(struct seq_file *m, struct dentry *root)
 {
        int i, cpu;
-        struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+        struct nfs_server *nfss = NFS_SB(root->d_sb);
        struct rpc_auth *auth = nfss->client->cl_auth;
        struct nfs_iostats totals = { };
@@ -799,10 +798,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
         * Display all mount option settings
         */
        seq_printf(m, "\n\topts:\t");
-        seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+        seq_puts(m, root->d_sb->s_flags & MS_RDONLY ? "ro" : "rw");
-        seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+        seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
-        seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+        seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : "");
-        seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+        seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
        nfs_show_mount_options(m, nfss, 1);
        seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
@@ -2788,11 +2787,15 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
                const char *export_path)
 {
        struct dentry *dentry;
-        int ret = nfs_referral_loop_protect();
+        int err;
-        if (ret) {
+        if (IS_ERR(root_mnt))
+                return ERR_CAST(root_mnt);
+        err = nfs_referral_loop_protect();
+        if (err) {
                mntput(root_mnt);
-                return ERR_PTR(ret);
+                return ERR_PTR(err);
        }
        dentry = mount_subtree(root_mnt, export_path);
@@ -2816,9 +2819,7 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
                        data->nfs_server.hostname);
        data->nfs_server.export_path = export_path;
-        res = ERR_CAST(root_mnt);
+        res = nfs_follow_remote_path(root_mnt, export_path);
-        if (!IS_ERR(root_mnt))
-                res = nfs_follow_remote_path(root_mnt, export_path);
        dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
                        IS_ERR(res) ? PTR_ERR(res) : 0,
@@ -3079,9 +3080,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
                        flags, data, data->hostname);
        data->mnt_path = export_path;
-        res = ERR_CAST(root_mnt);
+        res = nfs_follow_remote_path(root_mnt, export_path);
-        if (!IS_ERR(root_mnt))
-                res = nfs_follow_remote_path(root_mnt, export_path);
        dprintk("<-- nfs4_referral_mount() = %ld%s\n",
                        IS_ERR(res) ? PTR_ERR(res) : 0,
                        IS_ERR(res) ? " [error]" : "");
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index fa383361bc61..c5e28ed8bca0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -838,7 +838,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        return status;
                }
        }
-        status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
+        status = fh_want_write(&cstate->current_fh);
        if (status)
                return status;
        status = nfs_ok;
@@ -856,7 +856,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
                                0, (time_t)0);
 out:
-        mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
+        fh_drop_write(&cstate->current_fh);
        return status;
 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ed083b9a731b..80a0be9ed008 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -147,11 +147,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
        status = -EEXIST;
        if (dentry->d_inode)
                goto out_put;
-        status = mnt_want_write(rec_file->f_path.mnt);
+        status = mnt_want_write_file(rec_file);
        if (status)
                goto out_put;
        status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
-        mnt_drop_write(rec_file->f_path.mnt);
+        mnt_drop_write_file(rec_file);
 out_put:
        dput(dentry);
 out_unlock:
@@ -268,7 +268,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
        if (!rec_file || !clp->cl_firststate)
                return;
-        status = mnt_want_write(rec_file->f_path.mnt);
+        status = mnt_want_write_file(rec_file);
        if (status)
                goto out;
        clp->cl_firststate = 0;
@@ -281,7 +281,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
        nfs4_reset_creds(original_cred);
        if (status == 0)
                vfs_fsync(rec_file, 0);
-        mnt_drop_write(rec_file->f_path.mnt);
+        mnt_drop_write_file(rec_file);
 out:
        if (status)
                printk("NFSD: Failed to remove expired client state directory"
@@ -311,13 +311,13 @@ nfsd4_recdir_purge_old(void) {
        if (!rec_file)
                return;
-        status = mnt_want_write(rec_file->f_path.mnt);
+        status = mnt_want_write_file(rec_file);
        if (status)
                goto out;
        status = nfsd4_list_rec_dir(purge_old);
        if (status == 0)
                vfs_fsync(rec_file, 0);
-        mnt_drop_write(rec_file->f_path.mnt);
+        mnt_drop_write_file(rec_file);
 out:
        if (status)
                printk("nfsd4: failed to purge old clients from recovery"
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47e94e33a975..9ca16dc09e04 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -658,7 +658,7 @@ static int nfsd4_sanitize_slot_size(u32 size)
 /*
 * XXX: If we run out of reserved DRC memory we could (up to a point)
 * re-negotiate active sessions and reduce their slot usage to make
- * rooom for new connections. For now we just fail the create session.
+ * room for new connections. For now we just fail the create session.
 */
 static int nfsd4_get_drc_mem(int slotsize, u32 num)
 {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c45a2ea4a090..bb4a11d58a5a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -272,7 +272,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
         * 2.  Is that directory a mount point, or
         * 3.  Is that directory the root of an exported file system?
         */
-        error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb);
+        error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
        path_put(&path);
        return error;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c763de5c1157..68454e75fce9 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -59,7 +59,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
 * the write call).
 */
 static inline __be32
-nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int requested)
+nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested)
 {
        mode &= S_IFMT;
@@ -293,7 +293,7 @@ out:
 * include/linux/nfsd/nfsd.h.
 */
 __be32
-fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
 {
        struct svc_export *exp;
        struct dentry   *dentry;
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index c16f8d8331b5..e5e6707ba687 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -102,7 +102,7 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
 /*
 * Function prototypes
 */
-__be32  fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
+__be32  fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
 __be32  fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
 __be32  fh_update(struct svc_fh *);
 void    fh_put(struct svc_fh *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a2e442623c8..d25a723b68ad 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -307,7 +307,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        struct dentry   *dentry;
        struct inode    *inode;
        int             accmode = NFSD_MAY_SATTR;
-        int             ftype = 0;
+        umode_t         ftype = 0;
        __be32          err;
        int             host_err;
        int             size_change = 0;
@@ -730,7 +730,7 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
 * N.B. After this call fhp needs an fh_put
 */
 __be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        int access, struct file **filp)
 {
        struct dentry   *dentry;
@@ -1300,7 +1300,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
        }
-        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        host_err = fh_want_write(fhp);
        if (host_err)
                goto out_nfserr;
@@ -1325,7 +1325,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                break;
        }
        if (host_err < 0) {
-                mnt_drop_write(fhp->fh_export->ex_path.mnt);
+                fh_drop_write(fhp);
                goto out_nfserr;
        }
@@ -1339,7 +1339,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err2 = nfserrno(commit_metadata(fhp));
        if (err2)
                err = err2;
-        mnt_drop_write(fhp->fh_export->ex_path.mnt);
+        fh_drop_write(fhp);
        /*
         * Update the file handle to get the new inode info.
         */
@@ -1430,7 +1430,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                v_atime = verifier[1]&0x7fffffff;
        }
        
-        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        host_err = fh_want_write(fhp);
        if (host_err)
                goto out_nfserr;
        if (dchild->d_inode) {
@@ -1469,13 +1469,13 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                case NFS3_CREATE_GUARDED:
                        err = nfserr_exist;
                }
-                mnt_drop_write(fhp->fh_export->ex_path.mnt);
+                fh_drop_write(fhp);
                goto out;
        }
        host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
        if (host_err < 0) {
-                mnt_drop_write(fhp->fh_export->ex_path.mnt);
+                fh_drop_write(fhp);
                goto out_nfserr;
        }
        if (created)
@@ -1503,7 +1503,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (!err)
                err = nfserrno(commit_metadata(fhp));
-        mnt_drop_write(fhp->fh_export->ex_path.mnt);
+        fh_drop_write(fhp);
        /*
         * Update the filehandle to get the new inode info.
         */
@@ -1600,7 +1600,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dnew))
                goto out_nfserr;
-        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        host_err = fh_want_write(fhp);
        if (host_err)
                goto out_nfserr;
@@ -1621,7 +1621,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
                err = nfserrno(commit_metadata(fhp));
        fh_unlock(fhp);
-        mnt_drop_write(fhp->fh_export->ex_path.mnt);
+        fh_drop_write(fhp);
        cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
        dput(dnew);
@@ -1674,7 +1674,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        dold = tfhp->fh_dentry;
-        host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
+        host_err = fh_want_write(tfhp);
        if (host_err) {
                err = nfserrno(host_err);
                goto out_dput;
@@ -1699,7 +1699,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                        err = nfserrno(host_err);
        }
 out_drop_write:
-        mnt_drop_write(tfhp->fh_export->ex_path.mnt);
+        fh_drop_write(tfhp);
 out_dput:
        dput(dnew);
 out_unlock:
@@ -1776,7 +1776,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        host_err = -EXDEV;
        if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
                goto out_dput_new;
-        host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
+        host_err = fh_want_write(ffhp);
        if (host_err)
                goto out_dput_new;
@@ -1795,7 +1795,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                        host_err = commit_metadata(ffhp);
        }
 out_drop_write:
-        mnt_drop_write(ffhp->fh_export->ex_path.mnt);
+        fh_drop_write(ffhp);
 out_dput_new:
        dput(ndentry);
 out_dput_old:
@@ -1854,7 +1854,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = rdentry->d_inode->i_mode & S_IFMT;
-        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        host_err = fh_want_write(fhp);
        if (host_err)
                goto out_put;
@@ -1868,7 +1868,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!host_err)
                host_err = commit_metadata(fhp);
 out_drop_write:
-        mnt_drop_write(fhp->fh_export->ex_path.mnt);
+        fh_drop_write(fhp);
 out_put:
        dput(rdentry);
@@ -2270,7 +2270,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
        } else
                size = 0;
-        error = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        error = fh_want_write(fhp);
        if (error)
                goto getout;
        if (size)
@@ -2284,7 +2284,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
                                error = 0;
                }
        }
-        mnt_drop_write(fhp->fh_export->ex_path.mnt);
+        fh_drop_write(fhp);
 getout:
        kfree(value);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3f54ad03bb2b..1dcd238e11a0 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -66,7 +66,7 @@ __be32		do_nfsd_create(struct svc_rqst *, struct svc_fh *,
 __be32          nfsd_commit(struct svc_rqst *, struct svc_fh *,
                                loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
-__be32          nfsd_open(struct svc_rqst *, struct svc_fh *, int,
+__be32          nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
                                int, struct file **);
 void            nfsd_close(struct file *);
 __be32          nfsd_read(struct svc_rqst *, struct svc_fh *,
@@ -106,4 +106,14 @@ struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
 int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 #endif
+static inline int fh_want_write(struct svc_fh *fh)
+{
+        return mnt_want_write(fh->fh_export->ex_path.mnt);
+}
+static inline void fh_drop_write(struct svc_fh *fh)
+{
+        mnt_drop_write(fh->fh_export->ex_path.mnt);
+}
 #endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 3a1923943b14..ca35b3a46d17 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -251,7 +251,7 @@ nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
 static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
 {
-        mode_t mode = inode->i_mode;
+        umode_t mode = inode->i_mode;
        de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
 }
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b50ffb72e5b3..8f7b95ac1f7e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -291,7 +291,7 @@ const struct address_space_operations nilfs_aops = {
        .is_partially_uptodate  = block_is_partially_uptodate,
 };
-struct inode *nilfs_new_inode(struct inode *dir, int mode)
+struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 41d6743d303c..886649627c3d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -27,7 +27,7 @@
 #include <linux/uaccess.h>      /* copy_from_user(), copy_to_user() */
 #include <linux/vmalloc.h>
 #include <linux/compat.h>       /* compat_ptr() */
-#include <linux/mount.h>        /* mnt_want_write(), mnt_drop_write() */
+#include <linux/mount.h>        /* mnt_want_write_file(), mnt_drop_write_file() */
 #include <linux/buffer_head.h>
 #include <linux/nilfs2_fs.h>
 #include "nilfs.h"
@@ -119,7 +119,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
        if (get_user(flags, (int __user *)argp))
                return -EFAULT;
-        ret = mnt_want_write(filp->f_path.mnt);
+        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -154,7 +154,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
        ret = nilfs_transaction_commit(inode->i_sb);
 out:
        mutex_unlock(&inode->i_mutex);
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
        return ret;
 }
@@ -174,7 +174,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        ret = mnt_want_write(filp->f_path.mnt);
+        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -194,7 +194,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
        up_read(&inode->i_sb->s_umount);
 out:
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
        return ret;
 }
@@ -210,7 +210,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        ret = mnt_want_write(filp->f_path.mnt);
+        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -225,7 +225,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
        else
                nilfs_transaction_commit(inode->i_sb); /* never fails */
 out:
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
        return ret;
 }
@@ -591,7 +591,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
-        ret = mnt_want_write(filp->f_path.mnt);
+        ret = mnt_want_write_file(filp);
        if (ret)
                return ret;
@@ -625,6 +625,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
                if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment)
                        goto out_free;
+                if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size)
+                        goto out_free;
                len = argv[n].v_size * argv[n].v_nmembs;
                base = (void __user *)(unsigned long)argv[n].v_base;
                if (len == 0) {
@@ -672,7 +675,7 @@ out_free:
                vfree(kbufs[n]);
        kfree(kbufs[4]);
 out:
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
        return ret;
 }
@@ -707,7 +710,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
        if (!capable(CAP_SYS_ADMIN))
                goto out;
-        ret = mnt_want_write(filp->f_path.mnt);
+        ret = mnt_want_write_file(filp);
        if (ret)
                goto out;
@@ -718,7 +721,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
        ret = nilfs_resize_fs(inode->i_sb, newsize);
 out_drop_write:
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
 out:
        return ret;
 }
@@ -842,6 +845,19 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FS_IOC32_GETVERSION:
                cmd = FS_IOC_GETVERSION;
                break;
+        case NILFS_IOCTL_CHANGE_CPMODE:
+        case NILFS_IOCTL_DELETE_CHECKPOINT:
+        case NILFS_IOCTL_GET_CPINFO:
+        case NILFS_IOCTL_GET_CPSTAT:
+        case NILFS_IOCTL_GET_SUINFO:
+        case NILFS_IOCTL_GET_SUSTAT:
+        case NILFS_IOCTL_GET_VINFO:
+        case NILFS_IOCTL_GET_BDESCS:
+        case NILFS_IOCTL_CLEAN_SEGMENTS:
+        case NILFS_IOCTL_SYNC:
+        case NILFS_IOCTL_RESIZE:
+        case NILFS_IOCTL_SET_ALLOC_RANGE:
+                break;
        default:
                return -ENOIOCTLCMD;
        }
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 768982de10e4..1cd3f624dffc 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -84,7 +84,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 * If the create succeeds, we fill in the inode information
 * with d_instantiate().
 */
-static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                        struct nameidata *nd)
 {
        struct inode *inode;
@@ -112,7 +112,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
 }
 static int
-nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode *inode;
        struct nilfs_transaction_info ti;
@@ -213,7 +213,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
        return err;
 }
-static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        struct nilfs_transaction_info ti;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 3777d138f895..250add84da76 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -246,7 +246,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
 /* inode.c */
 void nilfs_inode_add_blocks(struct inode *inode, int n);
 void nilfs_inode_sub_blocks(struct inode *inode, int n);
-extern struct inode *nilfs_new_inode(struct inode *, int);
+extern struct inode *nilfs_new_inode(struct inode *, umode_t);
 extern void nilfs_free_inode(struct inode *);
 extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern void nilfs_set_inode_flags(struct inode *);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bb24ab6c282f..0e72ad6f22aa 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2470,7 +2470,7 @@ static int nilfs_segctor_thread(void *arg)
        if (freezing(current)) {
                spin_unlock(&sci->sc_state_lock);
-                refrigerator();
+                try_to_freeze();
                spin_lock(&sci->sc_state_lock);
        } else {
                DEFINE_WAIT(wait);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8351c44a7320..08e3d4f9df18 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,8 +175,6 @@ static void nilfs_i_callback(struct rcu_head *head)
        struct inode *inode = container_of(head, struct inode, i_rcu);
        struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        if (mdi) {
                kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
                kfree(mdi);
@@ -650,11 +648,11 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        return 0;
 }
-static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
 {
-        struct super_block *sb = vfs->mnt_sb;
+        struct super_block *sb = dentry->d_sb;
        struct the_nilfs *nilfs = sb->s_fs_info;
-        struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root;
+        struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
        if (!nilfs_test_opt(nilfs, BARRIER))
                seq_puts(seq, ",nobarrier");
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9fde1c00a296..3568c8a8b138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,8 @@
 #include <asm/ioctls.h>
+#include "../../mount.h"
 #define FANOTIFY_DEFAULT_MAX_EVENTS     16384
 #define FANOTIFY_DEFAULT_MAX_MARKS      8192
 #define FANOTIFY_DEFAULT_MAX_LISTENERS  128
@@ -546,7 +548,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
        removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
        fsnotify_put_mark(fsn_mark);
-        if (removed & mnt->mnt_fsnotify_mask)
+        if (removed & real_mount(mnt)->mnt_fsnotify_mask)
                fsnotify_recalc_vfsmount_mask(mnt);
        return 0;
@@ -623,7 +625,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
        }
        added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-        if (added & ~mnt->mnt_fsnotify_mask)
+        if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
                fsnotify_recalc_vfsmount_mask(mnt);
 err:
        fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 79b47cbb5cd8..ccb14d3fc0de 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -26,6 +26,7 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
+#include "../mount.h"
 /*
 * Clear all of the marks on an inode when it is being evicted from core
@@ -205,13 +206,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
        struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
        struct fsnotify_group *inode_group, *vfsmount_group;
        struct fsnotify_event *event = NULL;
-        struct vfsmount *mnt;
+        struct mount *mnt;
        int idx, ret = 0;
        /* global tests shouldn't care about events on child only the specific event */
        __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
        if (data_is == FSNOTIFY_EVENT_PATH)
-                mnt = ((struct path *)data)->mnt;
+                mnt = real_mount(((struct path *)data)->mnt);
        else
                mnt = NULL;
@@ -262,11 +263,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
                        /* we didn't use the vfsmount_mark */
                        vfsmount_group = NULL;
                } else if (vfsmount_group > inode_group) {
-                        ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+                        ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data,
                                            data_is, cookie, file_name, &event);
                        inode_group = NULL;
                } else {
-                        ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+                        ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark,
                                            mask, data, data_is, cookie, file_name,
                                            &event);
                }
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 778fe6cae3b0..b7b4b0e8554f 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -28,15 +28,17 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
+#include "../mount.h"
 void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
        struct fsnotify_mark *mark, *lmark;
        struct hlist_node *pos, *n;
+        struct mount *m = real_mount(mnt);
        LIST_HEAD(free_list);
        spin_lock(&mnt->mnt_root->d_lock);
-        hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
+        hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
                list_add(&mark->m.free_m_list, &free_list);
                hlist_del_init_rcu(&mark->m.m_list);
                fsnotify_get_mark(mark);
@@ -59,15 +61,16 @@ void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
 */
 static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
 {
+        struct mount *m = real_mount(mnt);
        struct fsnotify_mark *mark;
        struct hlist_node *pos;
        __u32 new_mask = 0;
        assert_spin_locked(&mnt->mnt_root->d_lock);
-        hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list)
+        hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
                new_mask |= mark->mask;
-        mnt->mnt_fsnotify_mask = new_mask;
+        m->mnt_fsnotify_mask = new_mask;
 }
 /*
@@ -101,12 +104,13 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
 static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
                                                                struct vfsmount *mnt)
 {
+        struct mount *m = real_mount(mnt);
        struct fsnotify_mark *mark;
        struct hlist_node *pos;
        assert_spin_locked(&mnt->mnt_root->d_lock);
-        hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) {
+        hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
                if (mark->group == group) {
                        fsnotify_get_mark(mark);
                        return mark;
@@ -140,6 +144,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
                               struct fsnotify_group *group, struct vfsmount *mnt,
                               int allow_dups)
 {
+        struct mount *m = real_mount(mnt);
        struct fsnotify_mark *lmark;
        struct hlist_node *node, *last = NULL;
        int ret = 0;
@@ -154,13 +159,13 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
        mark->m.mnt = mnt;
        /* is mark the first mark? */
-        if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
+        if (hlist_empty(&m->mnt_fsnotify_marks)) {
-                hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
+                hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks);
                goto out;
        }
        /* should mark be in the middle of the current list? */
-        hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+        hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
                last = node;
                if ((lmark->group == group) && !allow_dups) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 97e2dacbc867..2eaa66652944 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -335,7 +335,6 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
 static void ntfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
 }
@@ -2301,16 +2300,16 @@ void ntfs_evict_big_inode(struct inode *vi)
 /**
 * ntfs_show_options - show mount options in /proc/mounts
 * @sf:         seq_file in which to write our mount options
- * @mnt:        vfs mount whose mount options to display
+ * @root:       root of the mounted tree whose mount options to display
 *
 * Called by the VFS once for each mounted ntfs volume when someone reads
 * /proc/mounts in order to display the NTFS specific mount options of each
- * mount. The mount options of the vfs mount @mnt are written to the seq file
+ * mount. The mount options of fs specified by @root are written to the seq file
 * @sf and success is returned.
 */
-int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
+int ntfs_show_options(struct seq_file *sf, struct dentry *root)
 {
-        ntfs_volume *vol = NTFS_SB(mnt->mnt_sb);
+        ntfs_volume *vol = NTFS_SB(root->d_sb);
        int i;
        seq_printf(sf, ",uid=%i", vol->uid);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index fe8e7e928889..db29695f845c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -298,7 +298,7 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni);
 extern int ntfs_read_inode_mount(struct inode *vi);
-extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt);
+extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
 #ifdef NTFS_RW
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index b52706da4645..608be4516091 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -104,7 +104,7 @@ static bool parse_options(ntfs_volume *vol, char *opt)
        int errors = 0, sloppy = 0;
        uid_t uid = (uid_t)-1;
        gid_t gid = (gid_t)-1;
-        mode_t fmask = (mode_t)-1, dmask = (mode_t)-1;
+        umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
        int mft_zone_multiplier = -1, on_errors = -1;
        int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
        struct nls_table *nls_map = NULL, *old_nls;
@@ -287,9 +287,9 @@ no_mount_options:
                vol->uid = uid;
        if (gid != (gid_t)-1)
                vol->gid = gid;
-        if (fmask != (mode_t)-1)
+        if (fmask != (umode_t)-1)
                vol->fmask = fmask;
-        if (dmask != (mode_t)-1)
+        if (dmask != (umode_t)-1)
                vol->dmask = dmask;
        if (show_sys_files != -1) {
                if (show_sys_files)
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 406ab55dfb32..15e3ba8d521a 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -48,8 +48,8 @@ typedef struct {
        unsigned long flags;            /* Miscellaneous flags, see below. */
        uid_t uid;                      /* uid that files will be mounted as. */
        gid_t gid;                      /* gid that files will be mounted as. */
-        mode_t fmask;                   /* The mask for file permissions. */
+        umode_t fmask;                  /* The mask for file permissions. */
-        mode_t dmask;                   /* The mask for directory
+        umode_t dmask;                  /* The mask for directory
                                           permissions. */
        u8 mft_zone_multiplier;         /* Initial mft zone multiplier. */
        u8 on_errors;                   /* What to do on filesystem errors. */
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index dc45deb19e68..73ba81928bce 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -553,7 +553,7 @@ void o2net_debugfs_exit(void)
 int o2net_debugfs_init(void)
 {
-        mode_t mode = S_IFREG|S_IRUSR;
+        umode_t mode = S_IFREG|S_IRUSR;
        o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
        if (o2net_dentry)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b42076797049..abfac0d7ae9c 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -354,7 +354,6 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
 static void dlmfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
 }
@@ -401,16 +400,14 @@ static struct backing_dev_info dlmfs_backing_dev_info = {
 static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 {
        struct inode *inode = new_inode(sb);
-        int mode = S_IFDIR | 0755;
+        umode_t mode = S_IFDIR | 0755;
        struct dlmfs_inode_private *ip;
        if (inode) {
                ip = DLMFS_I(inode);
                inode->i_ino = get_next_ino();
-                inode->i_mode = mode;
+                inode_init_owner(inode, NULL, mode);
-                inode->i_uid = current_fsuid();
-                inode->i_gid = current_fsgid();
                inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                inc_nlink(inode);
@@ -424,7 +421,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 static struct inode *dlmfs_get_inode(struct inode *parent,
                                     struct dentry *dentry,
-                                     int mode)
+                                     umode_t mode)
 {
        struct super_block *sb = parent->i_sb;
        struct inode * inode = new_inode(sb);
@@ -434,9 +431,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
                return NULL;
        inode->i_ino = get_next_ino();
-        inode->i_mode = mode;
+        inode_init_owner(inode, parent, mode);
-        inode->i_uid = current_fsuid();
-        inode->i_gid = current_fsgid();
        inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -473,13 +468,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
                inc_nlink(inode);
                break;
        }
-        if (parent->i_mode & S_ISGID) {
-                inode->i_gid = parent->i_gid;
-                if (S_ISDIR(mode))
-                        inode->i_mode |= S_ISGID;
-        }
        return inode;
 }
@@ -489,7 +477,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 /* SMP-safe */
 static int dlmfs_mkdir(struct inode * dir,
                       struct dentry * dentry,
-                       int mode)
+                       umode_t mode)
 {
        int status;
        struct inode *inode = NULL;
@@ -537,7 +525,7 @@ bail:
 static int dlmfs_create(struct inode *dir,
                        struct dentry *dentry,
-                        int mode,
+                        umode_t mode,
                        struct nameidata *nd)
 {
        int status = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6e396683c3d4..061591a3ab08 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2128,7 +2128,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
                 * remove_suid() calls ->setattr without any hint that
                 * we may have already done our cluster locking. Since
                 * ocfs2_setattr() *must* take cluster locks to
-                 * proceeed, this will lead us to recursively lock the
+                 * proceed, this will lead us to recursively lock the
                 * inode. There's also the dinode i_size state which
                 * can be lost via setattr during extending writes (we
                 * set inode->i_size at the end of a write. */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 726ff265b296..a6fda3c188aa 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -906,12 +906,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (get_user(flags, (int __user *) arg))
                        return -EFAULT;
-                status = mnt_want_write(filp->f_path.mnt);
+                status = mnt_want_write_file(filp);
                if (status)
                        return status;
                status = ocfs2_set_inode_attr(inode, flags,
                        OCFS2_FL_MODIFIABLE);
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return status;
        case OCFS2_IOC_RESVSP:
        case OCFS2_IOC_RESVSP64:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 184c76b8c293..b1e3fce72ea4 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1059,7 +1059,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
        struct ocfs2_move_extents range;
        struct ocfs2_move_extents_context *context = NULL;
-        status = mnt_want_write(filp->f_path.mnt);
+        status = mnt_want_write_file(filp);
        if (status)
                return status;
@@ -1145,7 +1145,7 @@ out:
        kfree(context);
-        mnt_drop_write(filp->f_path.mnt);
+        mnt_drop_write_file(filp);
        return status;
 }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a8b2bfea574e..be244692550d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -185,7 +185,7 @@ bail:
        return ret;
 }
-static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
+static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
 {
        struct inode *inode;
@@ -207,7 +207,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
 static int ocfs2_mknod(struct inode *dir,
                       struct dentry *dentry,
-                       int mode,
+                       umode_t mode,
                       dev_t dev)
 {
        int status = 0;
@@ -602,7 +602,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
 static int ocfs2_mkdir(struct inode *dir,
                       struct dentry *dentry,
-                       int mode)
+                       umode_t mode)
 {
        int ret;
@@ -617,7 +617,7 @@ static int ocfs2_mkdir(struct inode *dir,
 static int ocfs2_create(struct inode *dir,
                        struct dentry *dentry,
-                        int mode,
+                        umode_t mode,
                        struct nameidata *nd)
 {
        int ret;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4994f8b0e604..604e12c4e979 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -108,7 +108,7 @@ static int ocfs2_parse_options(struct super_block *sb, char *options,
                               int is_remount);
 static int ocfs2_check_set_options(struct super_block *sb,
                                   struct mount_options *options);
-static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
+static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
 static void ocfs2_put_super(struct super_block *sb);
 static int ocfs2_mount_volume(struct super_block *sb);
 static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -569,7 +569,6 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
 static void ocfs2_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
 }
@@ -1534,9 +1533,9 @@ bail:
        return status;
 }
-static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
 {
-        struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
+        struct ocfs2_super *osb = OCFS2_SB(root->d_sb);
        unsigned long opts = osb->s_mount_opt;
        unsigned int local_alloc_megs;
@@ -1568,8 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
        if (osb->preferred_slot != OCFS2_INVALID_SLOT)
                seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
-        if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME))
+        seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
-                seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
        if (osb->osb_commit_interval)
                seq_printf(s, ",commit=%u",
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index aa9e8777b09a..0ba9ea1e7961 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -623,7 +623,7 @@ int ocfs2_calc_security_init(struct inode *dir,
 int ocfs2_calc_xattr_init(struct inode *dir,
                          struct buffer_head *dir_bh,
-                          int mode,
+                          umode_t mode,
                          struct ocfs2_security_xattr_info *si,
                          int *want_clusters,
                          int *xattr_credits,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index d63cfb72316b..e5c7f15465b4 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
                             struct ocfs2_security_xattr_info *,
                             int *, int *, struct ocfs2_alloc_context **);
 int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
-                          int, struct ocfs2_security_xattr_info *,
+                          umode_t, struct ocfs2_security_xattr_info *,
                          int *, int *, int *);
 /*
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 98e544274390..f00576ec320f 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -255,7 +255,7 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry)
        return 0;
 }
-static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
+static int omfs_add_node(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int err;
        struct inode *inode = omfs_new_inode(dir, mode);
@@ -279,12 +279,12 @@ out_free_inode:
        return err;
 }
-static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        return omfs_add_node(dir, dentry, mode | S_IFDIR);
 }
-static int omfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                struct nameidata *nd)
 {
        return omfs_add_node(dir, dentry, mode | S_IFREG);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index e043c4cb9a97..6065bb0ba207 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -28,7 +28,7 @@ struct buffer_head *omfs_bread(struct super_block *sb, sector_t block)
        return sb_bread(sb, clus_to_blk(sbi, block));
 }
-struct inode *omfs_new_inode(struct inode *dir, int mode)
+struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
 {
        struct inode *inode;
        u64 new_block;
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 7d414fef501a..8941f12c6b01 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -60,7 +60,7 @@ extern int omfs_shrink_inode(struct inode *inode);
 /* inode.c */
 extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block);
 extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
-extern struct inode *omfs_new_inode(struct inode *dir, int mode);
+extern struct inode *omfs_new_inode(struct inode *dir, umode_t mode);
 extern int omfs_reserve_block(struct super_block *sb, sector_t block);
 extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
 extern int omfs_sync_inode(struct inode *inode);
diff --git a/fs/open.c b/fs/open.c
index 22c41b543f2d..77becc041149 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -456,7 +456,7 @@ static int chmod_common(struct path *path, umode_t mode)
        if (error)
                return error;
        mutex_lock(&inode->i_mutex);
-        error = security_path_chmod(path->dentry, path->mnt, mode);
+        error = security_path_chmod(path, mode);
        if (error)
                goto out_unlock;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
@@ -468,7 +468,7 @@ out_unlock:
        return error;
 }
-SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
+SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 {
        struct file * file;
        int err = -EBADF;
@@ -482,7 +482,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
        return err;
 }
-SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
+SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
 {
        struct path path;
        int error;
@@ -495,7 +495,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
        return error;
 }
-SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
+SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 {
        return sys_fchmodat(AT_FDCWD, filename, mode);
 }
@@ -608,7 +608,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
        dentry = file->f_path.dentry;
        audit_inode(NULL, dentry);
        error = chown_common(&file->f_path, user, group);
-        mnt_drop_write(file->f_path.mnt);
+        mnt_drop_write_file(file);
 out_fput:
        fput(file);
 out:
@@ -877,7 +877,7 @@ void fd_install(unsigned int fd, struct file *file)
 EXPORT_SYMBOL(fd_install);
-static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
        int lookup_flags = 0;
        int acc_mode;
@@ -948,7 +948,7 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
 * have to.  But in generally you should not do this, so please move
 * along, nothing to see here..
 */
-struct file *filp_open(const char *filename, int flags, int mode)
+struct file *filp_open(const char *filename, int flags, umode_t mode)
 {
        struct open_flags op;
        int lookup = build_open_flags(flags, mode, &op);
@@ -970,7 +970,7 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(file_open_root);
-long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
+long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
 {
        struct open_flags op;
        int lookup = build_open_flags(flags, mode, &op);
@@ -994,7 +994,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
        return fd;
 }
-SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
+SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
 {
        long ret;
@@ -1008,7 +1008,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
 }
 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
-                int, mode)
+                umode_t, mode)
 {
        long ret;
@@ -1027,7 +1027,7 @@ SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
 * For backward compatibility?  Maybe this should be moved
 * into arch/i386 instead?
 */
-SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode)
+SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
 {
        return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
 }
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index e4e0ff7962e2..a88c03bc749d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -346,7 +346,6 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
 static void openprom_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(op_inode_cachep, OP_I(inode));
 }
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
deleted file mode 100644
index cb5f0a3f1b03..000000000000
--- a/fs/partitions/Kconfig
+++ /dev/null
@@ -1,251 +0,0 @@
-#
-# Partition configuration
-#
-config PARTITION_ADVANCED
-        bool "Advanced partition selection"
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned under an operating system running on a different
-          architecture than your Linux system.
-          Note that the answer to this question won't directly affect the
-          kernel: saying N will just cause the configurator to skip all
-          the questions about foreign partitioning schemes.
-          If unsure, say N.
-config ACORN_PARTITION
-        bool "Acorn partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        help
-          Support hard disks partitioned under Acorn operating systems.
-config ACORN_PARTITION_CUMANA
-        bool "Cumana partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        depends on ACORN_PARTITION
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned using the Cumana interface on Acorn machines.
-config ACORN_PARTITION_EESOX
-        bool "EESOX partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        depends on ACORN_PARTITION
-config ACORN_PARTITION_ICS
-        bool "ICS partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        depends on ACORN_PARTITION
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned using the ICS interface on Acorn machines.
-config ACORN_PARTITION_ADFS
-        bool "Native filecore partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        depends on ACORN_PARTITION
-        help
-          The Acorn Disc Filing System is the standard file system of the
-          RiscOS operating system which runs on Acorn's ARM-based Risc PC
-          systems and the Acorn Archimedes range of machines.  If you say
-          `Y' here, Linux will support disk partitions created under ADFS.
-config ACORN_PARTITION_POWERTEC
-        bool "PowerTec partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        depends on ACORN_PARTITION
-        help
-          Support reading partition tables created on Acorn machines using
-          the PowerTec SCSI drive.
-config ACORN_PARTITION_RISCIX
-        bool "RISCiX partition support" if PARTITION_ADVANCED
-        default y if ARCH_ACORN
-        depends on ACORN_PARTITION
-        help
-          Once upon a time, there was a native Unix port for the Acorn series
-          of machines called RISCiX.  If you say 'Y' here, Linux will be able
-          to read disks partitioned under RISCiX.
-config OSF_PARTITION
-        bool "Alpha OSF partition support" if PARTITION_ADVANCED
-        default y if ALPHA
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned on an Alpha machine.
-config AMIGA_PARTITION
-        bool "Amiga partition table support" if PARTITION_ADVANCED
-        default y if (AMIGA || AFFS_FS=y)
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned under AmigaOS.
-config ATARI_PARTITION
-        bool "Atari partition table support" if PARTITION_ADVANCED
-        default y if ATARI
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned under the Atari OS.
-config IBM_PARTITION
-        bool "IBM disk label and partition support"
-        depends on PARTITION_ADVANCED && S390
-        help
-          Say Y here if you would like to be able to read the hard disk
-          partition table format used by IBM DASD disks operating under CMS.
-          Otherwise, say N.
-config MAC_PARTITION
-        bool "Macintosh partition map support" if PARTITION_ADVANCED
-        default y if (MAC || PPC_PMAC)
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned on a Macintosh.
-config MSDOS_PARTITION
-        bool "PC BIOS (MSDOS partition tables) support" if PARTITION_ADVANCED
-        default y
-        help
-          Say Y here.
-config BSD_DISKLABEL
-        bool "BSD disklabel (FreeBSD partition tables) support"
-        depends on PARTITION_ADVANCED && MSDOS_PARTITION
-        help
-          FreeBSD uses its own hard disk partition scheme on your PC. It
-          requires only one entry in the primary partition table of your disk
-          and manages it similarly to DOS extended partitions, putting in its
-          first sector a new partition table in BSD disklabel format. Saying Y
-          here allows you to read these disklabels and further mount FreeBSD
-          partitions from within Linux if you have also said Y to "UFS
-          file system support", above. If you don't know what all this is
-          about, say N.
-config MINIX_SUBPARTITION
-        bool "Minix subpartition support"
-        depends on PARTITION_ADVANCED && MSDOS_PARTITION
-        help
-          Minix 2.0.0/2.0.2 subpartition table support for Linux.
-          Say Y here if you want to mount and use Minix 2.0.0/2.0.2
-          subpartitions.
-config SOLARIS_X86_PARTITION
-        bool "Solaris (x86) partition table support"
-        depends on PARTITION_ADVANCED && MSDOS_PARTITION
-        help
-          Like most systems, Solaris x86 uses its own hard disk partition
-          table format, incompatible with all others. Saying Y here allows you
-          to read these partition tables and further mount Solaris x86
-          partitions from within Linux if you have also said Y to "UFS
-          file system support", above.
-config UNIXWARE_DISKLABEL
-        bool "Unixware slices support"
-        depends on PARTITION_ADVANCED && MSDOS_PARTITION
-        ---help---
-          Like some systems, UnixWare uses its own slice table inside a
-          partition (VTOC - Virtual Table of Contents). Its format is
-          incompatible with all other OSes. Saying Y here allows you to read
-          VTOC and further mount UnixWare partitions read-only from within
-          Linux if you have also said Y to "UFS file system support" or
-          "System V and Coherent file system support", above.
-          This is mainly used to carry data from a UnixWare box to your
-          Linux box via a removable medium like magneto-optical, ZIP or
-          removable IDE drives. Note, however, that a good portable way to
-          transport files and directories between unixes (and even other
-          operating systems) is given by the tar program ("man tar" or
-          preferably "info tar").
-          If you don't know what all this is about, say N.
-config LDM_PARTITION
-        bool "Windows Logical Disk Manager (Dynamic Disk) support"
-        depends on PARTITION_ADVANCED
-        ---help---
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned using Windows 2000's/XP's or Vista's Logical Disk
-          Manager.  They are also known as "Dynamic Disks".
-          Note this driver only supports Dynamic Disks with a protective MBR
-          label, i.e. DOS partition table.  It does not support GPT labelled
-          Dynamic Disks yet as can be created with Vista.
-          Windows 2000 introduced the concept of Dynamic Disks to get around
-          the limitations of the PC's partitioning scheme.  The Logical Disk
-          Manager allows the user to repartition a disk and create spanned,
-          mirrored, striped or RAID volumes, all without the need for
-          rebooting.
-          Normal partitions are now called Basic Disks under Windows 2000, XP,
-          and Vista.
-          For a fuller description read <file:Documentation/ldm.txt>.
-          If unsure, say N.
-config LDM_DEBUG
-        bool "Windows LDM extra logging"
-        depends on LDM_PARTITION
-        help
-          Say Y here if you would like LDM to log verbosely.  This could be
-          helpful if the driver doesn't work as expected and you'd like to
-          report a bug.
-          If unsure, say N.
-config SGI_PARTITION
-        bool "SGI partition support" if PARTITION_ADVANCED
-        default y if DEFAULT_SGI_PARTITION
-        help
-          Say Y here if you would like to be able to read the hard disk
-          partition table format used by SGI machines.
-config ULTRIX_PARTITION
-        bool "Ultrix partition table support" if PARTITION_ADVANCED
-        default y if MACH_DECSTATION
-        help
-          Say Y here if you would like to be able to read the hard disk
-          partition table format used by DEC (now Compaq) Ultrix machines.
-          Otherwise, say N.
-config SUN_PARTITION
-        bool "Sun partition tables support" if PARTITION_ADVANCED
-        default y if (SPARC || SUN3 || SUN3X)
-        ---help---
-          Like most systems, SunOS uses its own hard disk partition table
-          format, incompatible with all others. Saying Y here allows you to
-          read these partition tables and further mount SunOS partitions from
-          within Linux if you have also said Y to "UFS file system support",
-          above. This is mainly used to carry data from a SPARC under SunOS to
-          your Linux box via a removable medium like magneto-optical or ZIP
-          drives; note however that a good portable way to transport files and
-          directories between unixes (and even other operating systems) is
-          given by the tar program ("man tar" or preferably "info tar"). If
-          you don't know what all this is about, say N.
-config KARMA_PARTITION
-        bool "Karma Partition support"
-        depends on PARTITION_ADVANCED
-        help
-          Say Y here if you would like to mount the Rio Karma MP3 player, as it
-          uses a proprietary partition table.
-config EFI_PARTITION
-        bool "EFI GUID Partition support"
-        depends on PARTITION_ADVANCED
-        select CRC32
-        help
-          Say Y here if you would like to use hard disks under Linux which
-          were partitioned using EFI GPT.
-config SYSV68_PARTITION
-        bool "SYSV68 partition table support" if PARTITION_ADVANCED
-        default y if VME
-        help
-          Say Y here if you would like to be able to read the hard disk
-          partition table format used by Motorola Delta machines (using
-          sysv68).
-          Otherwise, say N.
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
deleted file mode 100644
index 03af8eac51da..000000000000
--- a/fs/partitions/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# Makefile for the linux kernel.
-#
-obj-$(CONFIG_BLOCK) := check.o
-obj-$(CONFIG_ACORN_PARTITION) += acorn.o
-obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
-obj-$(CONFIG_ATARI_PARTITION) += atari.o
-obj-$(CONFIG_MAC_PARTITION) += mac.o
-obj-$(CONFIG_LDM_PARTITION) += ldm.o
-obj-$(CONFIG_MSDOS_PARTITION) += msdos.o
-obj-$(CONFIG_OSF_PARTITION) += osf.o
-obj-$(CONFIG_SGI_PARTITION) += sgi.o
-obj-$(CONFIG_SUN_PARTITION) += sun.o
-obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
-obj-$(CONFIG_IBM_PARTITION) += ibm.o
-obj-$(CONFIG_EFI_PARTITION) += efi.o
-obj-$(CONFIG_KARMA_PARTITION) += karma.o
-obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
deleted file mode 100644
index fbeb697374d5..000000000000
--- a/fs/partitions/acorn.c
+++ /dev/null
@@ -1,556 +0,0 @@
-/*
- *  linux/fs/partitions/acorn.c
- *
- *  Copyright (c) 1996-2000 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Scan ADFS partitions on hard disk drives.  Unfortunately, there
- *  isn't a standard for partitioning drives on Acorn machines, so
- *  every single manufacturer of SCSI and IDE cards created their own
- *  method.
- */
-#include <linux/buffer_head.h>
-#include <linux/adfs_fs.h>
-#include "check.h"
-#include "acorn.h"
-/*
- * Partition types. (Oh for reusability)
- */
-#define PARTITION_RISCIX_MFM    1
-#define PARTITION_RISCIX_SCSI   2
-#define PARTITION_LINUX         9
-#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
-        defined(CONFIG_ACORN_PARTITION_ADFS)
-static struct adfs_discrecord *
-adfs_partition(struct parsed_partitions *state, char *name, char *data,
-               unsigned long first_sector, int slot)
-{
-        struct adfs_discrecord *dr;
-        unsigned int nr_sects;
-        if (adfs_checkbblk(data))
-                return NULL;
-        dr = (struct adfs_discrecord *)(data + 0x1c0);
-        if (dr->disc_size == 0 && dr->disc_size_high == 0)
-                return NULL;
-        nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) |
-                   (le32_to_cpu(dr->disc_size) >> 9);
-        if (name) {
-                strlcat(state->pp_buf, " [", PAGE_SIZE);
-                strlcat(state->pp_buf, name, PAGE_SIZE);
-                strlcat(state->pp_buf, "]", PAGE_SIZE);
-        }
-        put_partition(state, slot, first_sector, nr_sects);
-        return dr;
-}
-#endif
-#ifdef CONFIG_ACORN_PARTITION_RISCIX
-struct riscix_part {
-        __le32  start;
-        __le32  length;
-        __le32  one;
-        char    name[16];
-};
-struct riscix_record {
-        __le32  magic;
-#define RISCIX_MAGIC    cpu_to_le32(0x4a657320)
-        __le32  date;
-        struct riscix_part part[8];
-};
-#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
-        defined(CONFIG_ACORN_PARTITION_ADFS)
-static int riscix_partition(struct parsed_partitions *state,
-                            unsigned long first_sect, int slot,
-                            unsigned long nr_sects)
-{
-        Sector sect;
-        struct riscix_record *rr;
-        
-        rr = read_part_sector(state, first_sect, &sect);
-        if (!rr)
-                return -1;
-        strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE);
-        if (rr->magic == RISCIX_MAGIC) {
-                unsigned long size = nr_sects > 2 ? 2 : nr_sects;
-                int part;
-                strlcat(state->pp_buf, " <", PAGE_SIZE);
-                put_partition(state, slot++, first_sect, size);
-                for (part = 0; part < 8; part++) {
-                        if (rr->part[part].one &&
-                            memcmp(rr->part[part].name, "All\0", 4)) {
-                                put_partition(state, slot++,
-                                        le32_to_cpu(rr->part[part].start),
-                                        le32_to_cpu(rr->part[part].length));
-                                strlcat(state->pp_buf, "(", PAGE_SIZE);
-                                strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE);
-                                strlcat(state->pp_buf, ")", PAGE_SIZE);
-                        }
-                }
-                strlcat(state->pp_buf, " >\n", PAGE_SIZE);
-        } else {
-                put_partition(state, slot++, first_sect, nr_sects);
-        }
-        put_dev_sector(sect);
-        return slot;
-}
-#endif
-#endif
-#define LINUX_NATIVE_MAGIC 0xdeafa1de
-#define LINUX_SWAP_MAGIC   0xdeafab1e
-struct linux_part {
-        __le32 magic;
-        __le32 start_sect;
-        __le32 nr_sects;
-};
-#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
-        defined(CONFIG_ACORN_PARTITION_ADFS)
-static int linux_partition(struct parsed_partitions *state,
-                           unsigned long first_sect, int slot,
-                           unsigned long nr_sects)
-{
-        Sector sect;
-        struct linux_part *linuxp;
-        unsigned long size = nr_sects > 2 ? 2 : nr_sects;
-        strlcat(state->pp_buf, " [Linux]", PAGE_SIZE);
-        put_partition(state, slot++, first_sect, size);
-        linuxp = read_part_sector(state, first_sect, &sect);
-        if (!linuxp)
-                return -1;
-        strlcat(state->pp_buf, " <", PAGE_SIZE);
-        while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) ||
-               linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) {
-                if (slot == state->limit)
-                        break;
-                put_partition(state, slot++, first_sect +
-                                 le32_to_cpu(linuxp->start_sect),
-                                 le32_to_cpu(linuxp->nr_sects));
-                linuxp ++;
-        }
-        strlcat(state->pp_buf, " >", PAGE_SIZE);
-        put_dev_sector(sect);
-        return slot;
-}
-#endif
-#ifdef CONFIG_ACORN_PARTITION_CUMANA
-int adfspart_check_CUMANA(struct parsed_partitions *state)
-{
-        unsigned long first_sector = 0;
-        unsigned int start_blk = 0;
-        Sector sect;
-        unsigned char *data;
-        char *name = "CUMANA/ADFS";
-        int first = 1;
-        int slot = 1;
-        /*
-         * Try Cumana style partitions - sector 6 contains ADFS boot block
-         * with pointer to next 'drive'.
-         *
-         * There are unknowns in this code - is the 'cylinder number' of the
-         * next partition relative to the start of this one - I'm assuming
-         * it is.
-         *
-         * Also, which ID did Cumana use?
-         *
-         * This is totally unfinished, and will require more work to get it
-         * going. Hence it is totally untested.
-         */
-        do {
-                struct adfs_discrecord *dr;
-                unsigned int nr_sects;
-                data = read_part_sector(state, start_blk * 2 + 6, &sect);
-                if (!data)
-                        return -1;
-                if (slot == state->limit)
-                        break;
-                dr = adfs_partition(state, name, data, first_sector, slot++);
-                if (!dr)
-                        break;
-                name = NULL;
-                nr_sects = (data[0x1fd] + (data[0x1fe] << 8)) *
-                           (dr->heads + (dr->lowsector & 0x40 ? 1 : 0)) *
-                           dr->secspertrack;
-                if (!nr_sects)
-                        break;
-                first = 0;
-                first_sector += nr_sects;
-                start_blk += nr_sects >> (BLOCK_SIZE_BITS - 9);
-                nr_sects = 0; /* hmm - should be partition size */
-                switch (data[0x1fc] & 15) {
-                case 0: /* No partition / ADFS? */
-                        break;
-#ifdef CONFIG_ACORN_PARTITION_RISCIX
-                case PARTITION_RISCIX_SCSI:
-                        /* RISCiX - we don't know how to find the next one. */
-                        slot = riscix_partition(state, first_sector, slot,
-                                                nr_sects);
-                        break;
-#endif
-                case PARTITION_LINUX:
-                        slot = linux_partition(state, first_sector, slot,
-                                               nr_sects);
-                        break;
-                }
-                put_dev_sector(sect);
-                if (slot == -1)
-                        return -1;
-        } while (1);
-        put_dev_sector(sect);
-        return first ? 0 : 1;
-}
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ADFS
-/*
- * Purpose: allocate ADFS partitions.
- *
- * Params : hd          - pointer to gendisk structure to store partition info.
- *          dev         - device number to access.
- *
- * Returns: -1 on error, 0 for no ADFS boot sector, 1 for ok.
- *
- * Alloc  : hda  = whole drive
- *          hda1 = ADFS partition on first drive.
- *          hda2 = non-ADFS partition.
- */
-int adfspart_check_ADFS(struct parsed_partitions *state)
-{
-        unsigned long start_sect, nr_sects, sectscyl, heads;
-        Sector sect;
-        unsigned char *data;
-        struct adfs_discrecord *dr;
-        unsigned char id;
-        int slot = 1;
-        data = read_part_sector(state, 6, &sect);
-        if (!data)
-                return -1;
-        dr = adfs_partition(state, "ADFS", data, 0, slot++);
-        if (!dr) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        heads = dr->heads + ((dr->lowsector >> 6) & 1);
-        sectscyl = dr->secspertrack * heads;
-        start_sect = ((data[0x1fe] << 8) + data[0x1fd]) * sectscyl;
-        id = data[0x1fc] & 15;
-        put_dev_sector(sect);
-        /*
-         * Work out start of non-adfs partition.
-         */
-        nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
-        if (start_sect) {
-                switch (id) {
-#ifdef CONFIG_ACORN_PARTITION_RISCIX
-                case PARTITION_RISCIX_SCSI:
-                case PARTITION_RISCIX_MFM:
-                        slot = riscix_partition(state, start_sect, slot,
-                                                nr_sects);
-                        break;
-#endif
-                case PARTITION_LINUX:
-                        slot = linux_partition(state, start_sect, slot,
-                                               nr_sects);
-                        break;
-                }
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        return 1;
-}
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ICS
-struct ics_part {
-        __le32 start;
-        __le32 size;
-};
-static int adfspart_check_ICSLinux(struct parsed_partitions *state,
-                                   unsigned long block)
-{
-        Sector sect;
-        unsigned char *data = read_part_sector(state, block, &sect);
-        int result = 0;
-        if (data) {
-                if (memcmp(data, "LinuxPart", 9) == 0)
-                        result = 1;
-                put_dev_sector(sect);
-        }
-        return result;
-}
-/*
- * Check for a valid ICS partition using the checksum.
- */
-static inline int valid_ics_sector(const unsigned char *data)
-{
-        unsigned long sum;
-        int i;
-        for (i = 0, sum = 0x50617274; i < 508; i++)
-                sum += data[i];
-        sum -= le32_to_cpu(*(__le32 *)(&data[508]));
-        return sum == 0;
-}
-/*
- * Purpose: allocate ICS partitions.
- * Params : hd          - pointer to gendisk structure to store partition info.
- *          dev         - device number to access.
- * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok.
- * Alloc  : hda  = whole drive
- *          hda1 = ADFS partition 0 on first drive.
- *          hda2 = ADFS partition 1 on first drive.
- *              ..etc..
- */
-int adfspart_check_ICS(struct parsed_partitions *state)
-{
-        const unsigned char *data;
-        const struct ics_part *p;
-        int slot;
-        Sector sect;
-        /*
-         * Try ICS style partitions - sector 0 contains partition info.
-         */
-        data = read_part_sector(state, 0, &sect);
-        if (!data)
-                return -1;
-        if (!valid_ics_sector(data)) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        strlcat(state->pp_buf, " [ICS]", PAGE_SIZE);
-        for (slot = 1, p = (const struct ics_part *)data; p->size; p++) {
-                u32 start = le32_to_cpu(p->start);
-                s32 size = le32_to_cpu(p->size); /* yes, it's signed. */
-                if (slot == state->limit)
-                        break;
-                /*
-                 * Negative sizes tell the RISC OS ICS driver to ignore
-                 * this partition - in effect it says that this does not
-                 * contain an ADFS filesystem.
-                 */
-                if (size < 0) {
-                        size = -size;
-                        /*
-                         * Our own extension - We use the first sector
-                         * of the partition to identify what type this
-                         * partition is.  We must not make this visible
-                         * to the filesystem.
-                         */
-                        if (size > 1 && adfspart_check_ICSLinux(state, start)) {
-                                start += 1;
-                                size -= 1;
-                        }
-                }
-                if (size)
-                        put_partition(state, slot++, start, size);
-        }
-        put_dev_sector(sect);
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        return 1;
-}
-#endif
-#ifdef CONFIG_ACORN_PARTITION_POWERTEC
-struct ptec_part {
-        __le32 unused1;
-        __le32 unused2;
-        __le32 start;
-        __le32 size;
-        __le32 unused5;
-        char type[8];
-};
-static inline int valid_ptec_sector(const unsigned char *data)
-{
-        unsigned char checksum = 0x2a;
-        int i;
-        /*
-         * If it looks like a PC/BIOS partition, then it
-         * probably isn't PowerTec.
-         */
-        if (data[510] == 0x55 && data[511] == 0xaa)
-                return 0;
-        for (i = 0; i < 511; i++)
-                checksum += data[i];
-        return checksum == data[511];
-}
-/*
- * Purpose: allocate ICS partitions.
- * Params : hd          - pointer to gendisk structure to store partition info.
- *          dev         - device number to access.
- * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok.
- * Alloc  : hda  = whole drive
- *          hda1 = ADFS partition 0 on first drive.
- *          hda2 = ADFS partition 1 on first drive.
- *              ..etc..
- */
-int adfspart_check_POWERTEC(struct parsed_partitions *state)
-{
-        Sector sect;
-        const unsigned char *data;
-        const struct ptec_part *p;
-        int slot = 1;
-        int i;
-        data = read_part_sector(state, 0, &sect);
-        if (!data)
-                return -1;
-        if (!valid_ptec_sector(data)) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE);
-        for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) {
-                u32 start = le32_to_cpu(p->start);
-                u32 size = le32_to_cpu(p->size);
-                if (size)
-                        put_partition(state, slot++, start, size);
-        }
-        put_dev_sector(sect);
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        return 1;
-}
-#endif
-#ifdef CONFIG_ACORN_PARTITION_EESOX
-struct eesox_part {
-        char    magic[6];
-        char    name[10];
-        __le32  start;
-        __le32  unused6;
-        __le32  unused7;
-        __le32  unused8;
-};
-/*
- * Guess who created this format?
- */
-static const char eesox_name[] = {
-        'N', 'e', 'i', 'l', ' ',
-        'C', 'r', 'i', 't', 'c', 'h', 'e', 'l', 'l', ' ', ' '
-};
-/*
- * EESOX SCSI partition format.
- *
- * This is a goddamned awful partition format.  We don't seem to store
- * the size of the partition in this table, only the start addresses.
- *
- * There are two possibilities where the size comes from:
- *  1. The individual ADFS boot block entries that are placed on the disk.
- *  2. The start address of the next entry.
- */
-int adfspart_check_EESOX(struct parsed_partitions *state)
-{
-        Sector sect;
-        const unsigned char *data;
-        unsigned char buffer[256];
-        struct eesox_part *p;
-        sector_t start = 0;
-        int i, slot = 1;
-        data = read_part_sector(state, 7, &sect);
-        if (!data)
-                return -1;
-        /*
-         * "Decrypt" the partition table.  God knows why...
-         */
-        for (i = 0; i < 256; i++)
-                buffer[i] = data[i] ^ eesox_name[i & 15];
-        put_dev_sector(sect);
-        for (i = 0, p = (struct eesox_part *)buffer; i < 8; i++, p++) {
-                sector_t next;
-                if (memcmp(p->magic, "Eesox", 6))
-                        break;
-                next = le32_to_cpu(p->start);
-                if (i)
-                        put_partition(state, slot++, start, next - start);
-                start = next;
-        }
-        if (i != 0) {
-                sector_t size;
-                size = get_capacity(state->bdev->bd_disk);
-                put_partition(state, slot++, start, size - start);
-                strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        }
-        return i ? 1 : 0;
-}
-#endif
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h
deleted file mode 100644
index ede828529692..000000000000
--- a/fs/partitions/acorn.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * linux/fs/partitions/acorn.h
- *
- * Copyright (C) 1996-2001 Russell King.
- *
- *  I _hate_ this partitioning mess - why can't we have one defined
- *  format, and everyone stick to it?
- */
-int adfspart_check_CUMANA(struct parsed_partitions *state);
-int adfspart_check_ADFS(struct parsed_partitions *state);
-int adfspart_check_ICS(struct parsed_partitions *state);
-int adfspart_check_POWERTEC(struct parsed_partitions *state);
-int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
deleted file mode 100644
index 70cbf44a1560..000000000000
--- a/fs/partitions/amiga.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- *  fs/partitions/amiga.c
- *
- *  Code extracted from drivers/block/genhd.c
- *
- *  Copyright (C) 1991-1998  Linus Torvalds
- *  Re-organised Feb 1998 Russell King
- */
-#include <linux/types.h>
-#include <linux/affs_hardblocks.h>
-#include "check.h"
-#include "amiga.h"
-static __inline__ u32
-checksum_block(__be32 *m, int size)
-{
-        u32 sum = 0;
-        while (size--)
-                sum += be32_to_cpu(*m++);
-        return sum;
-}
-int amiga_partition(struct parsed_partitions *state)
-{
-        Sector sect;
-        unsigned char *data;
-        struct RigidDiskBlock *rdb;
-        struct PartitionBlock *pb;
-        int start_sect, nr_sects, blk, part, res = 0;
-        int blksize = 1;        /* Multiplier for disk block size */
-        int slot = 1;
-        char b[BDEVNAME_SIZE];
-        for (blk = 0; ; blk++, put_dev_sector(sect)) {
-                if (blk == RDB_ALLOCATION_LIMIT)
-                        goto rdb_done;
-                data = read_part_sector(state, blk, &sect);
-                if (!data) {
-                        if (warn_no_part)
-                                printk("Dev %s: unable to read RDB block %d\n",
-                                       bdevname(state->bdev, b), blk);
-                        res = -1;
-                        goto rdb_done;
-                }
-                if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK))
-                        continue;
-                rdb = (struct RigidDiskBlock *)data;
-                if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F) == 0)
-                        break;
-                /* Try again with 0xdc..0xdf zeroed, Windows might have
-                 * trashed it.
-                 */
-                *(__be32 *)(data+0xdc) = 0;
-                if (checksum_block((__be32 *)data,
-                                be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
-                        printk("Warning: Trashed word at 0xd0 in block %d "
-                                "ignored in checksum calculation\n",blk);
-                        break;
-                }
-                printk("Dev %s: RDB in block %d has bad checksum\n",
-                       bdevname(state->bdev, b), blk);
-        }
-        /* blksize is blocks per 512 byte standard block */
-        blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512;
-        {
-                char tmp[7 + 10 + 1 + 1];
-                /* Be more informative */
-                snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512);
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-        }
-        blk = be32_to_cpu(rdb->rdb_PartitionList);
-        put_dev_sector(sect);
-        for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
-                blk *= blksize; /* Read in terms partition table understands */
-                data = read_part_sector(state, blk, &sect);
-                if (!data) {
-                        if (warn_no_part)
-                                printk("Dev %s: unable to read partition block %d\n",
-                                       bdevname(state->bdev, b), blk);
-                        res = -1;
-                        goto rdb_done;
-                }
-                pb  = (struct PartitionBlock *)data;
-                blk = be32_to_cpu(pb->pb_Next);
-                if (pb->pb_ID != cpu_to_be32(IDNAME_PARTITION))
-                        continue;
-                if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 )
-                        continue;
-                /* Tell Kernel about it */
-                nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 -
-                            be32_to_cpu(pb->pb_Environment[9])) *
-                           be32_to_cpu(pb->pb_Environment[3]) *
-                           be32_to_cpu(pb->pb_Environment[5]) *
-                           blksize;
-                if (!nr_sects)
-                        continue;
-                start_sect = be32_to_cpu(pb->pb_Environment[9]) *
-                             be32_to_cpu(pb->pb_Environment[3]) *
-                             be32_to_cpu(pb->pb_Environment[5]) *
-                             blksize;
-                put_partition(state,slot++,start_sect,nr_sects);
-                {
-                        /* Be even more informative to aid mounting */
-                        char dostype[4];
-                        char tmp[42];
-                        __be32 *dt = (__be32 *)dostype;
-                        *dt = pb->pb_Environment[16];
-                        if (dostype[3] < ' ')
-                                snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)",
-                                        dostype[0], dostype[1],
-                                        dostype[2], dostype[3] + '@' );
-                        else
-                                snprintf(tmp, sizeof(tmp), " (%c%c%c%c)",
-                                        dostype[0], dostype[1],
-                                        dostype[2], dostype[3]);
-                        strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                        snprintf(tmp, sizeof(tmp), "(res %d spb %d)",
-                                be32_to_cpu(pb->pb_Environment[6]),
-                                be32_to_cpu(pb->pb_Environment[4]));
-                        strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                }
-                res = 1;
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-rdb_done:
-        return res;
-}
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h
deleted file mode 100644
index d094585cadaa..000000000000
--- a/fs/partitions/amiga.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/*
- *  fs/partitions/amiga.h
- */
-int amiga_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
deleted file mode 100644
index 9875b05e80a2..000000000000
--- a/fs/partitions/atari.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  fs/partitions/atari.c
- *
- *  Code extracted from drivers/block/genhd.c
- *
- *  Copyright (C) 1991-1998  Linus Torvalds
- *  Re-organised Feb 1998 Russell King
- */
-#include <linux/ctype.h>
-#include "check.h"
-#include "atari.h"
-/* ++guenther: this should be settable by the user ("make config")?.
- */
-#define ICD_PARTS
-/* check if a partition entry looks valid -- Atari format is assumed if at
-   least one of the primary entries is ok this way */
-#define VALID_PARTITION(pi,hdsiz)                                            \
-    (((pi)->flg & 1) &&                                                      \
-     isalnum((pi)->id[0]) && isalnum((pi)->id[1]) && isalnum((pi)->id[2]) && \
-     be32_to_cpu((pi)->st) <= (hdsiz) &&                                     \
-     be32_to_cpu((pi)->st) + be32_to_cpu((pi)->siz) <= (hdsiz))
-static inline int OK_id(char *s)
-{
-        return  memcmp (s, "GEM", 3) == 0 || memcmp (s, "BGM", 3) == 0 ||
-                memcmp (s, "LNX", 3) == 0 || memcmp (s, "SWP", 3) == 0 ||
-                memcmp (s, "RAW", 3) == 0 ;
-}
-int atari_partition(struct parsed_partitions *state)
-{
-        Sector sect;
-        struct rootsector *rs;
-        struct partition_info *pi;
-        u32 extensect;
-        u32 hd_size;
-        int slot;
-#ifdef ICD_PARTS
-        int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
-#endif
-        rs = read_part_sector(state, 0, &sect);
-        if (!rs)
-                return -1;
-        /* Verify this is an Atari rootsector: */
-        hd_size = state->bdev->bd_inode->i_size >> 9;
-        if (!VALID_PARTITION(&rs->part[0], hd_size) &&
-            !VALID_PARTITION(&rs->part[1], hd_size) &&
-            !VALID_PARTITION(&rs->part[2], hd_size) &&
-            !VALID_PARTITION(&rs->part[3], hd_size)) {
-                /*
-                 * if there's no valid primary partition, assume that no Atari
-                 * format partition table (there's no reliable magic or the like
-                 * :-()
-                 */
-                put_dev_sector(sect);
-                return 0;
-        }
-        pi = &rs->part[0];
-        strlcat(state->pp_buf, " AHDI", PAGE_SIZE);
-        for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) {
-                struct rootsector *xrs;
-                Sector sect2;
-                ulong partsect;
-                if ( !(pi->flg & 1) )
-                        continue;
-                /* active partition */
-                if (memcmp (pi->id, "XGM", 3) != 0) {
-                        /* we don't care about other id's */
-                        put_partition (state, slot, be32_to_cpu(pi->st),
-                                        be32_to_cpu(pi->siz));
-                        continue;
-                }
-                /* extension partition */
-#ifdef ICD_PARTS
-                part_fmt = 1;
-#endif
-                strlcat(state->pp_buf, " XGM<", PAGE_SIZE);
-                partsect = extensect = be32_to_cpu(pi->st);
-                while (1) {
-                        xrs = read_part_sector(state, partsect, &sect2);
-                        if (!xrs) {
-                                printk (" block %ld read failed\n", partsect);
-                                put_dev_sector(sect);
-                                return -1;
-                        }
-                        /* ++roman: sanity check: bit 0 of flg field must be set */
-                        if (!(xrs->part[0].flg & 1)) {
-                                printk( "\nFirst sub-partition in extended partition is not valid!\n" );
-                                put_dev_sector(sect2);
-                                break;
-                        }
-                        put_partition(state, slot,
-                                   partsect + be32_to_cpu(xrs->part[0].st),
-                                   be32_to_cpu(xrs->part[0].siz));
-                        if (!(xrs->part[1].flg & 1)) {
-                                /* end of linked partition list */
-                                put_dev_sector(sect2);
-                                break;
-                        }
-                        if (memcmp( xrs->part[1].id, "XGM", 3 ) != 0) {
-                                printk("\nID of extended partition is not XGM!\n");
-                                put_dev_sector(sect2);
-                                break;
-                        }
-                        partsect = be32_to_cpu(xrs->part[1].st) + extensect;
-                        put_dev_sector(sect2);
-                        if (++slot == state->limit) {
-                                printk( "\nMaximum number of partitions reached!\n" );
-                                break;
-                        }
-                }
-                strlcat(state->pp_buf, " >", PAGE_SIZE);
-        }
-#ifdef ICD_PARTS
-        if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */
-                pi = &rs->icdpart[0];
-                /* sanity check: no ICD format if first partition invalid */
-                if (OK_id(pi->id)) {
-                        strlcat(state->pp_buf, " ICD<", PAGE_SIZE);
-                        for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) {
-                                /* accept only GEM,BGM,RAW,LNX,SWP partitions */
-                                if (!((pi->flg & 1) && OK_id(pi->id)))
-                                        continue;
-                                part_fmt = 2;
-                                put_partition (state, slot,
-                                                be32_to_cpu(pi->st),
-                                                be32_to_cpu(pi->siz));
-                        }
-                        strlcat(state->pp_buf, " >", PAGE_SIZE);
-                }
-        }
-#endif
-        put_dev_sector(sect);
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        return 1;
-}
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h
deleted file mode 100644
index fe2d32a89f36..000000000000
--- a/fs/partitions/atari.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- *  fs/partitions/atari.h
- *  Moved by Russell King from:
- *
- * linux/include/linux/atari_rootsec.h
- * definitions for Atari Rootsector layout
- * by Andreas Schwab (schwab@ls5.informatik.uni-dortmund.de)
- *
- * modified for ICD/Supra partitioning scheme restricted to at most 12
- * partitions
- * by Guenther Kelleter (guenther@pool.informatik.rwth-aachen.de)
- */
-struct partition_info
-{
-  u8 flg;                       /* bit 0: active; bit 7: bootable */
-  char id[3];                   /* "GEM", "BGM", "XGM", or other */
-  __be32 st;                    /* start of partition */
-  __be32 siz;                   /* length of partition */
-};
-struct rootsector
-{
-  char unused[0x156];           /* room for boot code */
-  struct partition_info icdpart[8];     /* info for ICD-partitions 5..12 */
-  char unused2[0xc];
-  u32 hd_siz;                   /* size of disk in blocks */
-  struct partition_info part[4];
-  u32 bsl_st;                   /* start of bad sector list */
-  u32 bsl_cnt;                  /* length of bad sector list */
-  u16 checksum;                 /* checksum for bootable disks */
-} __attribute__((__packed__));
-int atari_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
deleted file mode 100644
index e3c63d1c5e13..000000000000
--- a/fs/partitions/check.c
+++ /dev/null
@@ -1,687 +0,0 @@
-/*
- *  fs/partitions/check.c
- *
- *  Code extracted from drivers/block/genhd.c
- *  Copyright (C) 1991-1998  Linus Torvalds
- *  Re-organised Feb 1998 Russell King
- *
- *  We now have independent partition support from the
- *  block drivers, which allows all the partition code to
- *  be grouped in one location, and it to be mostly self
- *  contained.
- *
- *  Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/kmod.h>
-#include <linux/ctype.h>
-#include <linux/genhd.h>
-#include <linux/blktrace_api.h>
-#include "check.h"
-#include "acorn.h"
-#include "amiga.h"
-#include "atari.h"
-#include "ldm.h"
-#include "mac.h"
-#include "msdos.h"
-#include "osf.h"
-#include "sgi.h"
-#include "sun.h"
-#include "ibm.h"
-#include "ultrix.h"
-#include "efi.h"
-#include "karma.h"
-#include "sysv68.h"
-#ifdef CONFIG_BLK_DEV_MD
-extern void md_autodetect_dev(dev_t dev);
-#endif
-int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
-static int (*check_part[])(struct parsed_partitions *) = {
-        /*
-         * Probe partition formats with tables at disk address 0
-         * that also have an ADFS boot block at 0xdc0.
-         */
-#ifdef CONFIG_ACORN_PARTITION_ICS
-        adfspart_check_ICS,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_POWERTEC
-        adfspart_check_POWERTEC,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_EESOX
-        adfspart_check_EESOX,
-#endif
-        /*
-         * Now move on to formats that only have partition info at
-         * disk address 0xdc0.  Since these may also have stale
-         * PC/BIOS partition tables, they need to come before
-         * the msdos entry.
-         */
-#ifdef CONFIG_ACORN_PARTITION_CUMANA
-        adfspart_check_CUMANA,
-#endif
-#ifdef CONFIG_ACORN_PARTITION_ADFS
-        adfspart_check_ADFS,
-#endif
-#ifdef CONFIG_EFI_PARTITION
-        efi_partition,          /* this must come before msdos */
-#endif
-#ifdef CONFIG_SGI_PARTITION
-        sgi_partition,
-#endif
-#ifdef CONFIG_LDM_PARTITION
-        ldm_partition,          /* this must come before msdos */
-#endif
-#ifdef CONFIG_MSDOS_PARTITION
-        msdos_partition,
-#endif
-#ifdef CONFIG_OSF_PARTITION
-        osf_partition,
-#endif
-#ifdef CONFIG_SUN_PARTITION
-        sun_partition,
-#endif
-#ifdef CONFIG_AMIGA_PARTITION
-        amiga_partition,
-#endif
-#ifdef CONFIG_ATARI_PARTITION
-        atari_partition,
-#endif
-#ifdef CONFIG_MAC_PARTITION
-        mac_partition,
-#endif
-#ifdef CONFIG_ULTRIX_PARTITION
-        ultrix_partition,
-#endif
-#ifdef CONFIG_IBM_PARTITION
-        ibm_partition,
-#endif
-#ifdef CONFIG_KARMA_PARTITION
-        karma_partition,
-#endif
-#ifdef CONFIG_SYSV68_PARTITION
-        sysv68_partition,
-#endif
-        NULL
-};
- 
-/*
- * disk_name() is used by partition check code and the genhd driver.
- * It formats the devicename of the indicated disk into
- * the supplied buffer (of size at least 32), and returns
- * a pointer to that same buffer (for convenience).
- */
-char *disk_name(struct gendisk *hd, int partno, char *buf)
-{
-        if (!partno)
-                snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
-        else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
-                snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
-        else
-                snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
-        return buf;
-}
-const char *bdevname(struct block_device *bdev, char *buf)
-{
-        return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
-}
-EXPORT_SYMBOL(bdevname);
-/*
- * There's very little reason to use this, you should really
- * have a struct block_device just about everywhere and use
- * bdevname() instead.
- */
-const char *__bdevname(dev_t dev, char *buffer)
-{
-        scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
-                                MAJOR(dev), MINOR(dev));
-        return buffer;
-}
-EXPORT_SYMBOL(__bdevname);
-static struct parsed_partitions *
-check_partition(struct gendisk *hd, struct block_device *bdev)
-{
-        struct parsed_partitions *state;
-        int i, res, err;
-        state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
-        if (!state)
-                return NULL;
-        state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
-        if (!state->pp_buf) {
-                kfree(state);
-                return NULL;
-        }
-        state->pp_buf[0] = '\0';
-        state->bdev = bdev;
-        disk_name(hd, 0, state->name);
-        snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
-        if (isdigit(state->name[strlen(state->name)-1]))
-                sprintf(state->name, "p");
-        state->limit = disk_max_parts(hd);
-        i = res = err = 0;
-        while (!res && check_part[i]) {
-                memset(&state->parts, 0, sizeof(state->parts));
-                res = check_part[i++](state);
-                if (res < 0) {
-                        /* We have hit an I/O error which we don't report now.
-                        * But record it, and let the others do their job.
-                        */
-                        err = res;
-                        res = 0;
-                }
-        }
-        if (res > 0) {
-                printk(KERN_INFO "%s", state->pp_buf);
-                free_page((unsigned long)state->pp_buf);
-                return state;
-        }
-        if (state->access_beyond_eod)
-                err = -ENOSPC;
-        if (err)
-        /* The partition is unrecognized. So report I/O errors if there were any */
-                res = err;
-        if (!res)
-                strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
-        else if (warn_no_part)
-                strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
-        printk(KERN_INFO "%s", state->pp_buf);
-        free_page((unsigned long)state->pp_buf);
-        kfree(state);
-        return ERR_PTR(res);
-}
-static ssize_t part_partition_show(struct device *dev,
-                                   struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%d\n", p->partno);
-}
-static ssize_t part_start_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
-}
-ssize_t part_size_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
-}
-static ssize_t part_ro_show(struct device *dev,
-                            struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%d\n", p->policy ? 1 : 0);
-}
-static ssize_t part_alignment_offset_show(struct device *dev,
-                                          struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
-}
-static ssize_t part_discard_alignment_show(struct device *dev,
-                                           struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%u\n", p->discard_alignment);
-}
-ssize_t part_stat_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        int cpu;
-        cpu = part_stat_lock();
-        part_round_stats(cpu, p);
-        part_stat_unlock();
-        return sprintf(buf,
-                "%8lu %8lu %8llu %8u "
-                "%8lu %8lu %8llu %8u "
-                "%8u %8u %8u"
-                "\n",
-                part_stat_read(p, ios[READ]),
-                part_stat_read(p, merges[READ]),
-                (unsigned long long)part_stat_read(p, sectors[READ]),
-                jiffies_to_msecs(part_stat_read(p, ticks[READ])),
-                part_stat_read(p, ios[WRITE]),
-                part_stat_read(p, merges[WRITE]),
-                (unsigned long long)part_stat_read(p, sectors[WRITE]),
-                jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-                part_in_flight(p),
-                jiffies_to_msecs(part_stat_read(p, io_ticks)),
-                jiffies_to_msecs(part_stat_read(p, time_in_queue)));
-}
-ssize_t part_inflight_show(struct device *dev,
-                        struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
-                atomic_read(&p->in_flight[1]));
-}
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-ssize_t part_fail_show(struct device *dev,
-                       struct device_attribute *attr, char *buf)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        return sprintf(buf, "%d\n", p->make_it_fail);
-}
-ssize_t part_fail_store(struct device *dev,
-                        struct device_attribute *attr,
-                        const char *buf, size_t count)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        int i;
-        if (count > 0 && sscanf(buf, "%d", &i) > 0)
-                p->make_it_fail = (i == 0) ? 0 : 1;
-        return count;
-}
-#endif
-static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
-static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
-                   NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-static struct device_attribute dev_attr_fail =
-        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
-#endif
-static struct attribute *part_attrs[] = {
-        &dev_attr_partition.attr,
-        &dev_attr_start.attr,
-        &dev_attr_size.attr,
-        &dev_attr_ro.attr,
-        &dev_attr_alignment_offset.attr,
-        &dev_attr_discard_alignment.attr,
-        &dev_attr_stat.attr,
-        &dev_attr_inflight.attr,
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-        &dev_attr_fail.attr,
-#endif
-        NULL
-};
-static struct attribute_group part_attr_group = {
-        .attrs = part_attrs,
-};
-static const struct attribute_group *part_attr_groups[] = {
-        &part_attr_group,
-#ifdef CONFIG_BLK_DEV_IO_TRACE
-        &blk_trace_attr_group,
-#endif
-        NULL
-};
-static void part_release(struct device *dev)
-{
-        struct hd_struct *p = dev_to_part(dev);
-        free_part_stats(p);
-        free_part_info(p);
-        kfree(p);
-}
-struct device_type part_type = {
-        .name           = "partition",
-        .groups         = part_attr_groups,
-        .release        = part_release,
-};
-static void delete_partition_rcu_cb(struct rcu_head *head)
-{
-        struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
-        part->start_sect = 0;
-        part->nr_sects = 0;
-        part_stat_set_all(part, 0);
-        put_device(part_to_dev(part));
-}
-void __delete_partition(struct hd_struct *part)
-{
-        call_rcu(&part->rcu_head, delete_partition_rcu_cb);
-}
-void delete_partition(struct gendisk *disk, int partno)
-{
-        struct disk_part_tbl *ptbl = disk->part_tbl;
-        struct hd_struct *part;
-        if (partno >= ptbl->len)
-                return;
-        part = ptbl->part[partno];
-        if (!part)
-                return;
-        blk_free_devt(part_devt(part));
-        rcu_assign_pointer(ptbl->part[partno], NULL);
-        rcu_assign_pointer(ptbl->last_lookup, NULL);
-        kobject_put(part->holder_dir);
-        device_del(part_to_dev(part));
-        hd_struct_put(part);
-}
-static ssize_t whole_disk_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-        return 0;
-}
-static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
-                   whole_disk_show, NULL);
-struct hd_struct *add_partition(struct gendisk *disk, int partno,
-                                sector_t start, sector_t len, int flags,
-                                struct partition_meta_info *info)
-{
-        struct hd_struct *p;
-        dev_t devt = MKDEV(0, 0);
-        struct device *ddev = disk_to_dev(disk);
-        struct device *pdev;
-        struct disk_part_tbl *ptbl;
-        const char *dname;
-        int err;
-        err = disk_expand_part_tbl(disk, partno);
-        if (err)
-                return ERR_PTR(err);
-        ptbl = disk->part_tbl;
-        if (ptbl->part[partno])
-                return ERR_PTR(-EBUSY);
-        p = kzalloc(sizeof(*p), GFP_KERNEL);
-        if (!p)
-                return ERR_PTR(-EBUSY);
-        if (!init_part_stats(p)) {
-                err = -ENOMEM;
-                goto out_free;
-        }
-        pdev = part_to_dev(p);
-        p->start_sect = start;
-        p->alignment_offset =
-                queue_limit_alignment_offset(&disk->queue->limits, start);
-        p->discard_alignment =
-                queue_limit_discard_alignment(&disk->queue->limits, start);
-        p->nr_sects = len;
-        p->partno = partno;
-        p->policy = get_disk_ro(disk);
-        if (info) {
-                struct partition_meta_info *pinfo = alloc_part_info(disk);
-                if (!pinfo)
-                        goto out_free_stats;
-                memcpy(pinfo, info, sizeof(*info));
-                p->info = pinfo;
-        }
-        dname = dev_name(ddev);
-        if (isdigit(dname[strlen(dname) - 1]))
-                dev_set_name(pdev, "%sp%d", dname, partno);
-        else
-                dev_set_name(pdev, "%s%d", dname, partno);
-        device_initialize(pdev);
-        pdev->class = &block_class;
-        pdev->type = &part_type;
-        pdev->parent = ddev;
-        err = blk_alloc_devt(p, &devt);
-        if (err)
-                goto out_free_info;
-        pdev->devt = devt;
-        /* delay uevent until 'holders' subdir is created */
-        dev_set_uevent_suppress(pdev, 1);
-        err = device_add(pdev);
-        if (err)
-                goto out_put;
-        err = -ENOMEM;
-        p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
-        if (!p->holder_dir)
-                goto out_del;
-        dev_set_uevent_suppress(pdev, 0);
-        if (flags & ADDPART_FLAG_WHOLEDISK) {
-                err = device_create_file(pdev, &dev_attr_whole_disk);
-                if (err)
-                        goto out_del;
-        }
-        /* everything is up and running, commence */
-        rcu_assign_pointer(ptbl->part[partno], p);
-        /* suppress uevent if the disk suppresses it */
-        if (!dev_get_uevent_suppress(ddev))
-                kobject_uevent(&pdev->kobj, KOBJ_ADD);
-        hd_ref_init(p);
-        return p;
-out_free_info:
-        free_part_info(p);
-out_free_stats:
-        free_part_stats(p);
-out_free:
-        kfree(p);
-        return ERR_PTR(err);
-out_del:
-        kobject_put(p->holder_dir);
-        device_del(pdev);
-out_put:
-        put_device(pdev);
-        blk_free_devt(devt);
-        return ERR_PTR(err);
-}
-static bool disk_unlock_native_capacity(struct gendisk *disk)
-{
-        const struct block_device_operations *bdops = disk->fops;
-        if (bdops->unlock_native_capacity &&
-            !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
-                printk(KERN_CONT "enabling native capacity\n");
-                bdops->unlock_native_capacity(disk);
-                disk->flags |= GENHD_FL_NATIVE_CAPACITY;
-                return true;
-        } else {
-                printk(KERN_CONT "truncated\n");
-                return false;
-        }
-}
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
-{
-        struct parsed_partitions *state = NULL;
-        struct disk_part_iter piter;
-        struct hd_struct *part;
-        int p, highest, res;
-rescan:
-        if (state && !IS_ERR(state)) {
-                kfree(state);
-                state = NULL;
-        }
-        if (bdev->bd_part_count)
-                return -EBUSY;
-        res = invalidate_partition(disk, 0);
-        if (res)
-                return res;
-        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
-        while ((part = disk_part_iter_next(&piter)))
-                delete_partition(disk, part->partno);
-        disk_part_iter_exit(&piter);
-        if (disk->fops->revalidate_disk)
-                disk->fops->revalidate_disk(disk);
-        check_disk_size_change(disk, bdev);
-        bdev->bd_invalidated = 0;
-        if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
-                return 0;
-        if (IS_ERR(state)) {
-                /*
-                 * I/O error reading the partition table.  If any
-                 * partition code tried to read beyond EOD, retry
-                 * after unlocking native capacity.
-                 */
-                if (PTR_ERR(state) == -ENOSPC) {
-                        printk(KERN_WARNING "%s: partition table beyond EOD, ",
-                               disk->disk_name);
-                        if (disk_unlock_native_capacity(disk))
-                                goto rescan;
-                }
-                return -EIO;
-        }
-        /*
-         * If any partition code tried to read beyond EOD, try
-         * unlocking native capacity even if partition table is
-         * successfully read as we could be missing some partitions.
-         */
-        if (state->access_beyond_eod) {
-                printk(KERN_WARNING
-                       "%s: partition table partially beyond EOD, ",
-                       disk->disk_name);
-                if (disk_unlock_native_capacity(disk))
-                        goto rescan;
-        }
-        /* tell userspace that the media / partition table may have changed */
-        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
-        /* Detect the highest partition number and preallocate
-         * disk->part_tbl.  This is an optimization and not strictly
-         * necessary.
-         */
-        for (p = 1, highest = 0; p < state->limit; p++)
-                if (state->parts[p].size)
-                        highest = p;
-        disk_expand_part_tbl(disk, highest);
-        /* add partitions */
-        for (p = 1; p < state->limit; p++) {
-                sector_t size, from;
-                struct partition_meta_info *info = NULL;
-                size = state->parts[p].size;
-                if (!size)
-                        continue;
-                from = state->parts[p].from;
-                if (from >= get_capacity(disk)) {
-                        printk(KERN_WARNING
-                               "%s: p%d start %llu is beyond EOD, ",
-                               disk->disk_name, p, (unsigned long long) from);
-                        if (disk_unlock_native_capacity(disk))
-                                goto rescan;
-                        continue;
-                }
-                if (from + size > get_capacity(disk)) {
-                        printk(KERN_WARNING
-                               "%s: p%d size %llu extends beyond EOD, ",
-                               disk->disk_name, p, (unsigned long long) size);
-                        if (disk_unlock_native_capacity(disk)) {
-                                /* free state and restart */
-                                goto rescan;
-                        } else {
-                                /*
-                                 * we can not ignore partitions of broken tables
-                                 * created by for example camera firmware, but
-                                 * we limit them to the end of the disk to avoid
-                                 * creating invalid block devices
-                                 */
-                                size = get_capacity(disk) - from;
-                        }
-                }
-                if (state->parts[p].has_info)
-                        info = &state->parts[p].info;
-                part = add_partition(disk, p, from, size,
-                                     state->parts[p].flags,
-                                     &state->parts[p].info);
-                if (IS_ERR(part)) {
-                        printk(KERN_ERR " %s: p%d could not be added: %ld\n",
-                               disk->disk_name, p, -PTR_ERR(part));
-                        continue;
-                }
-#ifdef CONFIG_BLK_DEV_MD
-                if (state->parts[p].flags & ADDPART_FLAG_RAID)
-                        md_autodetect_dev(part_to_dev(part)->devt);
-#endif
-        }
-        kfree(state);
-        return 0;
-}
-unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
-{
-        struct address_space *mapping = bdev->bd_inode->i_mapping;
-        struct page *page;
-        page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
-                                 NULL);
-        if (!IS_ERR(page)) {
-                if (PageError(page))
-                        goto fail;
-                p->v = page;
-                return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9);
-fail:
-                page_cache_release(page);
-        }
-        p->v = NULL;
-        return NULL;
-}
-EXPORT_SYMBOL(read_dev_sector);
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
deleted file mode 100644
index d68bf4dc3bc2..000000000000
--- a/fs/partitions/check.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#include <linux/pagemap.h>
-#include <linux/blkdev.h>
-#include <linux/genhd.h>
-/*
- * add_gd_partition adds a partitions details to the devices partition
- * description.
- */
-struct parsed_partitions {
-        struct block_device *bdev;
-        char name[BDEVNAME_SIZE];
-        struct {
-                sector_t from;
-                sector_t size;
-                int flags;
-                bool has_info;
-                struct partition_meta_info info;
-        } parts[DISK_MAX_PARTS];
-        int next;
-        int limit;
-        bool access_beyond_eod;
-        char *pp_buf;
-};
-static inline void *read_part_sector(struct parsed_partitions *state,
-                                     sector_t n, Sector *p)
-{
-        if (n >= get_capacity(state->bdev->bd_disk)) {
-                state->access_beyond_eod = true;
-                return NULL;
-        }
-        return read_dev_sector(state->bdev, n, p);
-}
-static inline void
-put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
-{
-        if (n < p->limit) {
-                char tmp[1 + BDEVNAME_SIZE + 10 + 1];
-                p->parts[n].from = from;
-                p->parts[n].size = size;
-                snprintf(tmp, sizeof(tmp), " %s%d", p->name, n);
-                strlcat(p->pp_buf, tmp, PAGE_SIZE);
-        }
-}
-extern int warn_no_part;
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
deleted file mode 100644
index 6296b403c67a..000000000000
--- a/fs/partitions/efi.c
+++ /dev/null
@@ -1,675 +0,0 @@
-/************************************************************
- * EFI GUID Partition Table handling
- *
- * http://www.uefi.org/specs/
- * http://www.intel.com/technology/efi/
- *
- * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com>
- *   Copyright 2000,2001,2002,2004 Dell Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- *
- * TODO:
- *
- * Changelog:
- * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com>
- * - test for valid PMBR and valid PGPT before ever reading
- *   AGPT, allow override with 'gpt' kernel command line option.
- * - check for first/last_usable_lba outside of size of disk
- *
- * Tue  Mar 26 2002 Matt Domsch <Matt_Domsch@dell.com>
- * - Ported to 2.5.7-pre1 and 2.5.7-dj2
- * - Applied patch to avoid fault in alternate header handling
- * - cleaned up find_valid_gpt
- * - On-disk structure and copy in memory is *always* LE now - 
- *   swab fields as needed
- * - remove print_gpt_header()
- * - only use first max_p partition entries, to keep the kernel minor number
- *   and partition numbers tied.
- *
- * Mon  Feb 04 2002 Matt Domsch <Matt_Domsch@dell.com>
- * - Removed __PRIPTR_PREFIX - not being used
- *
- * Mon  Jan 14 2002 Matt Domsch <Matt_Domsch@dell.com>
- * - Ported to 2.5.2-pre11 + library crc32 patch Linus applied
- *
- * Thu Dec 6 2001 Matt Domsch <Matt_Domsch@dell.com>
- * - Added compare_gpts().
- * - moved le_efi_guid_to_cpus() back into this file.  GPT is the only
- *   thing that keeps EFI GUIDs on disk.
- * - Changed gpt structure names and members to be simpler and more Linux-like.
- * 
- * Wed Oct 17 2001 Matt Domsch <Matt_Domsch@dell.com>
- * - Removed CONFIG_DEVFS_VOLUMES_UUID code entirely per Martin Wilck
- *
- * Wed Oct 10 2001 Matt Domsch <Matt_Domsch@dell.com>
- * - Changed function comments to DocBook style per Andreas Dilger suggestion.
- *
- * Mon Oct 08 2001 Matt Domsch <Matt_Domsch@dell.com>
- * - Change read_lba() to use the page cache per Al Viro's work.
- * - print u64s properly on all architectures
- * - fixed debug_printk(), now Dprintk()
- *
- * Mon Oct 01 2001 Matt Domsch <Matt_Domsch@dell.com>
- * - Style cleanups
- * - made most functions static
- * - Endianness addition
- * - remove test for second alternate header, as it's not per spec,
- *   and is unnecessary.  There's now a method to read/write the last
- *   sector of an odd-sized disk from user space.  No tools have ever
- *   been released which used this code, so it's effectively dead.
- * - Per Asit Mallick of Intel, added a test for a valid PMBR.
- * - Added kernel command line option 'gpt' to override valid PMBR test.
- *
- * Wed Jun  6 2001 Martin Wilck <Martin.Wilck@Fujitsu-Siemens.com>
- * - added devfs volume UUID support (/dev/volumes/uuids) for
- *   mounting file systems by the partition GUID. 
- *
- * Tue Dec  5 2000 Matt Domsch <Matt_Domsch@dell.com>
- * - Moved crc32() to linux/lib, added efi_crc32().
- *
- * Thu Nov 30 2000 Matt Domsch <Matt_Domsch@dell.com>
- * - Replaced Intel's CRC32 function with an equivalent
- *   non-license-restricted version.
- *
- * Wed Oct 25 2000 Matt Domsch <Matt_Domsch@dell.com>
- * - Fixed the last_lba() call to return the proper last block
- *
- * Thu Oct 12 2000 Matt Domsch <Matt_Domsch@dell.com>
- * - Thanks to Andries Brouwer for his debugging assistance.
- * - Code works, detects all the partitions.
- *
- ************************************************************/
-#include <linux/crc32.h>
-#include <linux/ctype.h>
-#include <linux/math64.h>
-#include <linux/slab.h>
-#include "check.h"
-#include "efi.h"
-/* This allows a kernel command line option 'gpt' to override
- * the test for invalid PMBR.  Not __initdata because reloading
- * the partition tables happens after init too.
- */
-static int force_gpt;
-static int __init
-force_gpt_fn(char *str)
-{
-        force_gpt = 1;
-        return 1;
-}
-__setup("gpt", force_gpt_fn);
-/**
- * efi_crc32() - EFI version of crc32 function
- * @buf: buffer to calculate crc32 of
- * @len - length of buf
- *
- * Description: Returns EFI-style CRC32 value for @buf
- * 
- * This function uses the little endian Ethernet polynomial
- * but seeds the function with ~0, and xor's with ~0 at the end.
- * Note, the EFI Specification, v1.02, has a reference to
- * Dr. Dobbs Journal, May 1994 (actually it's in May 1992).
- */
-static inline u32
-efi_crc32(const void *buf, unsigned long len)
-{
-        return (crc32(~0L, buf, len) ^ ~0L);
-}
-/**
- * last_lba(): return number of last logical block of device
- * @bdev: block device
- * 
- * Description: Returns last LBA value on success, 0 on error.
- * This is stored (by sd and ide-geometry) in
- *  the part[0] entry for this disk, and is the number of
- *  physical sectors available on the disk.
- */
-static u64 last_lba(struct block_device *bdev)
-{
-        if (!bdev || !bdev->bd_inode)
-                return 0;
-        return div_u64(bdev->bd_inode->i_size,
-                       bdev_logical_block_size(bdev)) - 1ULL;
-}
-static inline int
-pmbr_part_valid(struct partition *part)
-{
-        if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT &&
-            le32_to_cpu(part->start_sect) == 1UL)
-                return 1;
-        return 0;
-}
-/**
- * is_pmbr_valid(): test Protective MBR for validity
- * @mbr: pointer to a legacy mbr structure
- *
- * Description: Returns 1 if PMBR is valid, 0 otherwise.
- * Validity depends on two things:
- *  1) MSDOS signature is in the last two bytes of the MBR
- *  2) One partition of type 0xEE is found
- */
-static int
-is_pmbr_valid(legacy_mbr *mbr)
-{
-        int i;
-        if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE)
-                return 0;
-        for (i = 0; i < 4; i++)
-                if (pmbr_part_valid(&mbr->partition_record[i]))
-                        return 1;
-        return 0;
-}
-/**
- * read_lba(): Read bytes from disk, starting at given LBA
- * @state
- * @lba
- * @buffer
- * @size_t
- *
- * Description: Reads @count bytes from @state->bdev into @buffer.
- * Returns number of bytes read on success, 0 on error.
- */
-static size_t read_lba(struct parsed_partitions *state,
-                       u64 lba, u8 *buffer, size_t count)
-{
-        size_t totalreadcount = 0;
-        struct block_device *bdev = state->bdev;
-        sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
-        if (!buffer || lba > last_lba(bdev))
-                return 0;
-        while (count) {
-                int copied = 512;
-                Sector sect;
-                unsigned char *data = read_part_sector(state, n++, &sect);
-                if (!data)
-                        break;
-                if (copied > count)
-                        copied = count;
-                memcpy(buffer, data, copied);
-                put_dev_sector(sect);
-                buffer += copied;
-                totalreadcount +=copied;
-                count -= copied;
-        }
-        return totalreadcount;
-}
-/**
- * alloc_read_gpt_entries(): reads partition entries from disk
- * @state
- * @gpt - GPT header
- * 
- * Description: Returns ptes on success,  NULL on error.
- * Allocates space for PTEs based on information found in @gpt.
- * Notes: remember to free pte when you're done!
- */
-static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
-                                         gpt_header *gpt)
-{
-        size_t count;
-        gpt_entry *pte;
-        if (!gpt)
-                return NULL;
-        count = le32_to_cpu(gpt->num_partition_entries) *
-                le32_to_cpu(gpt->sizeof_partition_entry);
-        if (!count)
-                return NULL;
-        pte = kzalloc(count, GFP_KERNEL);
-        if (!pte)
-                return NULL;
-        if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba),
-                     (u8 *) pte,
-                     count) < count) {
-                kfree(pte);
-                pte=NULL;
-                return NULL;
-        }
-        return pte;
-}
-/**
- * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
- * @state
- * @lba is the Logical Block Address of the partition table
- * 
- * Description: returns GPT header on success, NULL on error.   Allocates
- * and fills a GPT header starting at @ from @state->bdev.
- * Note: remember to free gpt when finished with it.
- */
-static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
-                                         u64 lba)
-{
-        gpt_header *gpt;
-        unsigned ssz = bdev_logical_block_size(state->bdev);
-        gpt = kzalloc(ssz, GFP_KERNEL);
-        if (!gpt)
-                return NULL;
-        if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) {
-                kfree(gpt);
-                gpt=NULL;
-                return NULL;
-        }
-        return gpt;
-}
-/**
- * is_gpt_valid() - tests one GPT header and PTEs for validity
- * @state
- * @lba is the logical block address of the GPT header to test
- * @gpt is a GPT header ptr, filled on return.
- * @ptes is a PTEs ptr, filled on return.
- *
- * Description: returns 1 if valid,  0 on error.
- * If valid, returns pointers to newly allocated GPT header and PTEs.
- */
-static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
-                        gpt_header **gpt, gpt_entry **ptes)
-{
-        u32 crc, origcrc;
-        u64 lastlba;
-        if (!ptes)
-                return 0;
-        if (!(*gpt = alloc_read_gpt_header(state, lba)))
-                return 0;
-        /* Check the GUID Partition Table signature */
-        if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
-                pr_debug("GUID Partition Table Header signature is wrong:"
-                         "%lld != %lld\n",
-                         (unsigned long long)le64_to_cpu((*gpt)->signature),
-                         (unsigned long long)GPT_HEADER_SIGNATURE);
-                goto fail;
-        }
-        /* Check the GUID Partition Table header size */
-        if (le32_to_cpu((*gpt)->header_size) >
-                        bdev_logical_block_size(state->bdev)) {
-                pr_debug("GUID Partition Table Header size is wrong: %u > %u\n",
-                        le32_to_cpu((*gpt)->header_size),
-                        bdev_logical_block_size(state->bdev));
-                goto fail;
-        }
-        /* Check the GUID Partition Table CRC */
-        origcrc = le32_to_cpu((*gpt)->header_crc32);
-        (*gpt)->header_crc32 = 0;
-        crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
-        if (crc != origcrc) {
-                pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
-                         crc, origcrc);
-                goto fail;
-        }
-        (*gpt)->header_crc32 = cpu_to_le32(origcrc);
-        /* Check that the my_lba entry points to the LBA that contains
-         * the GUID Partition Table */
-        if (le64_to_cpu((*gpt)->my_lba) != lba) {
-                pr_debug("GPT my_lba incorrect: %lld != %lld\n",
-                         (unsigned long long)le64_to_cpu((*gpt)->my_lba),
-                         (unsigned long long)lba);
-                goto fail;
-        }
-        /* Check the first_usable_lba and last_usable_lba are
-         * within the disk.
-         */
-        lastlba = last_lba(state->bdev);
-        if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
-                pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
-                         (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
-                         (unsigned long long)lastlba);
-                goto fail;
-        }
-        if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
-                pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
-                         (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
-                         (unsigned long long)lastlba);
-                goto fail;
-        }
-        /* Check that sizeof_partition_entry has the correct value */
-        if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
-                pr_debug("GUID Partitition Entry Size check failed.\n");
-                goto fail;
-        }
-        if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
-                goto fail;
-        /* Check the GUID Partition Entry Array CRC */
-        crc = efi_crc32((const unsigned char *) (*ptes),
-                        le32_to_cpu((*gpt)->num_partition_entries) *
-                        le32_to_cpu((*gpt)->sizeof_partition_entry));
-        if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
-                pr_debug("GUID Partitition Entry Array CRC check failed.\n");
-                goto fail_ptes;
-        }
-        /* We're done, all's well */
-        return 1;
- fail_ptes:
-        kfree(*ptes);
-        *ptes = NULL;
- fail:
-        kfree(*gpt);
-        *gpt = NULL;
-        return 0;
-}
-/**
- * is_pte_valid() - tests one PTE for validity
- * @pte is the pte to check
- * @lastlba is last lba of the disk
- *
- * Description: returns 1 if valid,  0 on error.
- */
-static inline int
-is_pte_valid(const gpt_entry *pte, const u64 lastlba)
-{
-        if ((!efi_guidcmp(pte->partition_type_guid, NULL_GUID)) ||
-            le64_to_cpu(pte->starting_lba) > lastlba         ||
-            le64_to_cpu(pte->ending_lba)   > lastlba)
-                return 0;
-        return 1;
-}
-/**
- * compare_gpts() - Search disk for valid GPT headers and PTEs
- * @pgpt is the primary GPT header
- * @agpt is the alternate GPT header
- * @lastlba is the last LBA number
- * Description: Returns nothing.  Sanity checks pgpt and agpt fields
- * and prints warnings on discrepancies.
- * 
- */
-static void
-compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
-{
-        int error_found = 0;
-        if (!pgpt || !agpt)
-                return;
-        if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) {
-                printk(KERN_WARNING
-                       "GPT:Primary header LBA != Alt. header alternate_lba\n");
-                printk(KERN_WARNING "GPT:%lld != %lld\n",
-                       (unsigned long long)le64_to_cpu(pgpt->my_lba),
-                       (unsigned long long)le64_to_cpu(agpt->alternate_lba));
-                error_found++;
-        }
-        if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) {
-                printk(KERN_WARNING
-                       "GPT:Primary header alternate_lba != Alt. header my_lba\n");
-                printk(KERN_WARNING "GPT:%lld != %lld\n",
-                       (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
-                       (unsigned long long)le64_to_cpu(agpt->my_lba));
-                error_found++;
-        }
-        if (le64_to_cpu(pgpt->first_usable_lba) !=
-            le64_to_cpu(agpt->first_usable_lba)) {
-                printk(KERN_WARNING "GPT:first_usable_lbas don't match.\n");
-                printk(KERN_WARNING "GPT:%lld != %lld\n",
-                       (unsigned long long)le64_to_cpu(pgpt->first_usable_lba),
-                       (unsigned long long)le64_to_cpu(agpt->first_usable_lba));
-                error_found++;
-        }
-        if (le64_to_cpu(pgpt->last_usable_lba) !=
-            le64_to_cpu(agpt->last_usable_lba)) {
-                printk(KERN_WARNING "GPT:last_usable_lbas don't match.\n");
-                printk(KERN_WARNING "GPT:%lld != %lld\n",
-                       (unsigned long long)le64_to_cpu(pgpt->last_usable_lba),
-                       (unsigned long long)le64_to_cpu(agpt->last_usable_lba));
-                error_found++;
-        }
-        if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) {
-                printk(KERN_WARNING "GPT:disk_guids don't match.\n");
-                error_found++;
-        }
-        if (le32_to_cpu(pgpt->num_partition_entries) !=
-            le32_to_cpu(agpt->num_partition_entries)) {
-                printk(KERN_WARNING "GPT:num_partition_entries don't match: "
-                       "0x%x != 0x%x\n",
-                       le32_to_cpu(pgpt->num_partition_entries),
-                       le32_to_cpu(agpt->num_partition_entries));
-                error_found++;
-        }
-        if (le32_to_cpu(pgpt->sizeof_partition_entry) !=
-            le32_to_cpu(agpt->sizeof_partition_entry)) {
-                printk(KERN_WARNING
-                       "GPT:sizeof_partition_entry values don't match: "
-                       "0x%x != 0x%x\n",
-                       le32_to_cpu(pgpt->sizeof_partition_entry),
-                       le32_to_cpu(agpt->sizeof_partition_entry));
-                error_found++;
-        }
-        if (le32_to_cpu(pgpt->partition_entry_array_crc32) !=
-            le32_to_cpu(agpt->partition_entry_array_crc32)) {
-                printk(KERN_WARNING
-                       "GPT:partition_entry_array_crc32 values don't match: "
-                       "0x%x != 0x%x\n",
-                       le32_to_cpu(pgpt->partition_entry_array_crc32),
-                       le32_to_cpu(agpt->partition_entry_array_crc32));
-                error_found++;
-        }
-        if (le64_to_cpu(pgpt->alternate_lba) != lastlba) {
-                printk(KERN_WARNING
-                       "GPT:Primary header thinks Alt. header is not at the end of the disk.\n");
-                printk(KERN_WARNING "GPT:%lld != %lld\n",
-                        (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
-                        (unsigned long long)lastlba);
-                error_found++;
-        }
-        if (le64_to_cpu(agpt->my_lba) != lastlba) {
-                printk(KERN_WARNING
-                       "GPT:Alternate GPT header not at the end of the disk.\n");
-                printk(KERN_WARNING "GPT:%lld != %lld\n",
-                        (unsigned long long)le64_to_cpu(agpt->my_lba),
-                        (unsigned long long)lastlba);
-                error_found++;
-        }
-        if (error_found)
-                printk(KERN_WARNING
-                       "GPT: Use GNU Parted to correct GPT errors.\n");
-        return;
-}
-/**
- * find_valid_gpt() - Search disk for valid GPT headers and PTEs
- * @state
- * @gpt is a GPT header ptr, filled on return.
- * @ptes is a PTEs ptr, filled on return.
- * Description: Returns 1 if valid, 0 on error.
- * If valid, returns pointers to newly allocated GPT header and PTEs.
- * Validity depends on PMBR being valid (or being overridden by the
- * 'gpt' kernel command line option) and finding either the Primary
- * GPT header and PTEs valid, or the Alternate GPT header and PTEs
- * valid.  If the Primary GPT header is not valid, the Alternate GPT header
- * is not checked unless the 'gpt' kernel command line option is passed.
- * This protects against devices which misreport their size, and forces
- * the user to decide to use the Alternate GPT.
- */
-static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
-                          gpt_entry **ptes)
-{
-        int good_pgpt = 0, good_agpt = 0, good_pmbr = 0;
-        gpt_header *pgpt = NULL, *agpt = NULL;
-        gpt_entry *pptes = NULL, *aptes = NULL;
-        legacy_mbr *legacymbr;
-        u64 lastlba;
-        if (!ptes)
-                return 0;
-        lastlba = last_lba(state->bdev);
-        if (!force_gpt) {
-                /* This will be added to the EFI Spec. per Intel after v1.02. */
-                legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
-                if (legacymbr) {
-                        read_lba(state, 0, (u8 *) legacymbr,
-                                 sizeof (*legacymbr));
-                        good_pmbr = is_pmbr_valid(legacymbr);
-                        kfree(legacymbr);
-                }
-                if (!good_pmbr)
-                        goto fail;
-        }
-        good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA,
-                                 &pgpt, &pptes);
-        if (good_pgpt)
-                good_agpt = is_gpt_valid(state,
-                                         le64_to_cpu(pgpt->alternate_lba),
-                                         &agpt, &aptes);
-        if (!good_agpt && force_gpt)
-                good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
-        /* The obviously unsuccessful case */
-        if (!good_pgpt && !good_agpt)
-                goto fail;
-        compare_gpts(pgpt, agpt, lastlba);
-        /* The good cases */
-        if (good_pgpt) {
-                *gpt  = pgpt;
-                *ptes = pptes;
-                kfree(agpt);
-                kfree(aptes);
-                if (!good_agpt) {
-                        printk(KERN_WARNING 
-                               "Alternate GPT is invalid, "
-                               "using primary GPT.\n");
-                }
-                return 1;
-        }
-        else if (good_agpt) {
-                *gpt  = agpt;
-                *ptes = aptes;
-                kfree(pgpt);
-                kfree(pptes);
-                printk(KERN_WARNING 
-                       "Primary GPT is invalid, using alternate GPT.\n");
-                return 1;
-        }
- fail:
-        kfree(pgpt);
-        kfree(agpt);
-        kfree(pptes);
-        kfree(aptes);
-        *gpt = NULL;
-        *ptes = NULL;
-        return 0;
-}
-/**
- * efi_partition(struct parsed_partitions *state)
- * @state
- *
- * Description: called from check.c, if the disk contains GPT
- * partitions, sets up partition entries in the kernel.
- *
- * If the first block on the disk is a legacy MBR,
- * it will get handled by msdos_partition().
- * If it's a Protective MBR, we'll handle it here.
- *
- * We do not create a Linux partition for GPT, but
- * only for the actual data partitions.
- * Returns:
- * -1 if unable to read the partition table
- *  0 if this isn't our partition table
- *  1 if successful
- *
- */
-int efi_partition(struct parsed_partitions *state)
-{
-        gpt_header *gpt = NULL;
-        gpt_entry *ptes = NULL;
-        u32 i;
-        unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
-        u8 unparsed_guid[37];
-        if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
-                kfree(gpt);
-                kfree(ptes);
-                return 0;
-        }
-        pr_debug("GUID Partition Table is valid!  Yea!\n");
-        for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
-                struct partition_meta_info *info;
-                unsigned label_count = 0;
-                unsigned label_max;
-                u64 start = le64_to_cpu(ptes[i].starting_lba);
-                u64 size = le64_to_cpu(ptes[i].ending_lba) -
-                           le64_to_cpu(ptes[i].starting_lba) + 1ULL;
-                if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
-                        continue;
-                put_partition(state, i+1, start * ssz, size * ssz);
-                /* If this is a RAID volume, tell md */
-                if (!efi_guidcmp(ptes[i].partition_type_guid,
-                                 PARTITION_LINUX_RAID_GUID))
-                        state->parts[i + 1].flags = ADDPART_FLAG_RAID;
-                info = &state->parts[i + 1].info;
-                /* Instead of doing a manual swap to big endian, reuse the
-                 * common ASCII hex format as the interim.
-                 */
-                efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
-                part_pack_uuid(unparsed_guid, info->uuid);
-                /* Naively convert UTF16-LE to 7 bits. */
-                label_max = min(sizeof(info->volname) - 1,
-                                sizeof(ptes[i].partition_name));
-                info->volname[label_max] = 0;
-                while (label_count < label_max) {
-                        u8 c = ptes[i].partition_name[label_count] & 0xff;
-                        if (c && !isprint(c))
-                                c = '!';
-                        info->volname[label_count] = c;
-                        label_count++;
-                }
-                state->parts[i + 1].has_info = true;
-        }
-        kfree(ptes);
-        kfree(gpt);
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        return 1;
-}
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
deleted file mode 100644
index b69ab729558f..000000000000
--- a/fs/partitions/efi.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/************************************************************
- * EFI GUID Partition Table
- * Per Intel EFI Specification v1.02
- * http://developer.intel.com/technology/efi/efi.htm
- *
- * By Matt Domsch <Matt_Domsch@dell.com>  Fri Sep 22 22:15:56 CDT 2000  
- *   Copyright 2000,2001 Dell Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- * 
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- * 
- ************************************************************/
-#ifndef FS_PART_EFI_H_INCLUDED
-#define FS_PART_EFI_H_INCLUDED
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/string.h>
-#include <linux/efi.h>
-#define MSDOS_MBR_SIGNATURE 0xaa55
-#define EFI_PMBR_OSTYPE_EFI 0xEF
-#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
-#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
-#define GPT_HEADER_REVISION_V1 0x00010000
-#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
-#define PARTITION_SYSTEM_GUID \
-    EFI_GUID( 0xC12A7328, 0xF81F, 0x11d2, \
-              0xBA, 0x4B, 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B) 
-#define LEGACY_MBR_PARTITION_GUID \
-    EFI_GUID( 0x024DEE41, 0x33E7, 0x11d3, \
-              0x9D, 0x69, 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F)
-#define PARTITION_MSFT_RESERVED_GUID \
-    EFI_GUID( 0xE3C9E316, 0x0B5C, 0x4DB8, \
-              0x81, 0x7D, 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE)
-#define PARTITION_BASIC_DATA_GUID \
-    EFI_GUID( 0xEBD0A0A2, 0xB9E5, 0x4433, \
-              0x87, 0xC0, 0x68, 0xB6, 0xB7, 0x26, 0x99, 0xC7)
-#define PARTITION_LINUX_RAID_GUID \
-    EFI_GUID( 0xa19d880f, 0x05fc, 0x4d3b, \
-              0xa0, 0x06, 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e)
-#define PARTITION_LINUX_SWAP_GUID \
-    EFI_GUID( 0x0657fd6d, 0xa4ab, 0x43c4, \
-              0x84, 0xe5, 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f)
-#define PARTITION_LINUX_LVM_GUID \
-    EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \
-              0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28)
-typedef struct _gpt_header {
-        __le64 signature;
-        __le32 revision;
-        __le32 header_size;
-        __le32 header_crc32;
-        __le32 reserved1;
-        __le64 my_lba;
-        __le64 alternate_lba;
-        __le64 first_usable_lba;
-        __le64 last_usable_lba;
-        efi_guid_t disk_guid;
-        __le64 partition_entry_lba;
-        __le32 num_partition_entries;
-        __le32 sizeof_partition_entry;
-        __le32 partition_entry_array_crc32;
-        /* The rest of the logical block is reserved by UEFI and must be zero.
-         * EFI standard handles this by:
-         *
-         * uint8_t              reserved2[ BlockSize - 92 ];
-         */
-} __attribute__ ((packed)) gpt_header;
-typedef struct _gpt_entry_attributes {
-        u64 required_to_function:1;
-        u64 reserved:47;
-        u64 type_guid_specific:16;
-} __attribute__ ((packed)) gpt_entry_attributes;
-typedef struct _gpt_entry {
-        efi_guid_t partition_type_guid;
-        efi_guid_t unique_partition_guid;
-        __le64 starting_lba;
-        __le64 ending_lba;
-        gpt_entry_attributes attributes;
-        efi_char16_t partition_name[72 / sizeof (efi_char16_t)];
-} __attribute__ ((packed)) gpt_entry;
-typedef struct _legacy_mbr {
-        u8 boot_code[440];
-        __le32 unique_mbr_signature;
-        __le16 unknown;
-        struct partition partition_record[4];
-        __le16 signature;
-} __attribute__ ((packed)) legacy_mbr;
-/* Functions */
-extern int efi_partition(struct parsed_partitions *state);
-#endif
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * --------------------------------------------------------------------------
- * Local variables:
- * c-indent-level: 4 
- * c-brace-imaginary-offset: 0
- * c-brace-offset: -4
- * c-argdecl-indent: 4
- * c-label-offset: -4
- * c-continued-statement-offset: 4
- * c-continued-brace-offset: 0
- * indent-tabs-mode: nil
- * tab-width: 8
- * End:
- */
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
deleted file mode 100644
index d513a07f44bb..000000000000
--- a/fs/partitions/ibm.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * File...........: linux/fs/partitions/ibm.c
- * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
- *                  Volker Sameske <sameske@de.ibm.com>
- * Bugreports.to..: <Linux390@de.ibm.com>
- * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
- */
-#include <linux/buffer_head.h>
-#include <linux/hdreg.h>
-#include <linux/slab.h>
-#include <asm/dasd.h>
-#include <asm/ebcdic.h>
-#include <asm/uaccess.h>
-#include <asm/vtoc.h>
-#include "check.h"
-#include "ibm.h"
-/*
- * compute the block number from a
- * cyl-cyl-head-head structure
- */
-static sector_t
-cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
-        sector_t cyl;
-        __u16 head;
-        /*decode cylinder and heads for large volumes */
-        cyl = ptr->hh & 0xFFF0;
-        cyl <<= 12;
-        cyl |= ptr->cc;
-        head = ptr->hh & 0x000F;
-        return cyl * geo->heads * geo->sectors +
-               head * geo->sectors;
-}
-/*
- * compute the block number from a
- * cyl-cyl-head-head-block structure
- */
-static sector_t
-cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
-        sector_t cyl;
-        __u16 head;
-        /*decode cylinder and heads for large volumes */
-        cyl = ptr->hh & 0xFFF0;
-        cyl <<= 12;
-        cyl |= ptr->cc;
-        head = ptr->hh & 0x000F;
-        return  cyl * geo->heads * geo->sectors +
-                head * geo->sectors +
-                ptr->b;
-}
-/*
- */
-int ibm_partition(struct parsed_partitions *state)
-{
-        struct block_device *bdev = state->bdev;
-        int blocksize, res;
-        loff_t i_size, offset, size, fmt_size;
-        dasd_information2_t *info;
-        struct hd_geometry *geo;
-        char type[5] = {0,};
-        char name[7] = {0,};
-        union label_t {
-                struct vtoc_volume_label_cdl vol;
-                struct vtoc_volume_label_ldl lnx;
-                struct vtoc_cms_label cms;
-        } *label;
-        unsigned char *data;
-        Sector sect;
-        sector_t labelsect;
-        char tmp[64];
-        res = 0;
-        blocksize = bdev_logical_block_size(bdev);
-        if (blocksize <= 0)
-                goto out_exit;
-        i_size = i_size_read(bdev->bd_inode);
-        if (i_size == 0)
-                goto out_exit;
-        info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
-        if (info == NULL)
-                goto out_exit;
-        geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
-        if (geo == NULL)
-                goto out_nogeo;
-        label = kmalloc(sizeof(union label_t), GFP_KERNEL);
-        if (label == NULL)
-                goto out_nolab;
-        if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
-            ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
-                goto out_freeall;
-        /*
-         * Special case for FBA disks: label sector does not depend on
-         * blocksize.
-         */
-        if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
-            (info->cu_type == 0x3880 && info->dev_type == 0x3370))
-                labelsect = info->label_block;
-        else
-                labelsect = info->label_block * (blocksize >> 9);
-        /*
-         * Get volume label, extract name and type.
-         */
-        data = read_part_sector(state, labelsect, &sect);
-        if (data == NULL)
-                goto out_readerr;
-        memcpy(label, data, sizeof(union label_t));
-        put_dev_sector(sect);
-        if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
-                strncpy(type, label->vol.vollbl, 4);
-                strncpy(name, label->vol.volid, 6);
-        } else {
-                strncpy(type, label->lnx.vollbl, 4);
-                strncpy(name, label->lnx.volid, 6);
-        }
-        EBCASC(type, 4);
-        EBCASC(name, 6);
-        res = 1;
-        /*
-         * Three different formats: LDL, CDL and unformated disk
-         *
-         * identified by info->format
-         *
-         * unformated disks we do not have to care about
-         */
-        if (info->format == DASD_FORMAT_LDL) {
-                if (strncmp(type, "CMS1", 4) == 0) {
-                        /*
-                         * VM style CMS1 labeled disk
-                         */
-                        blocksize = label->cms.block_size;
-                        if (label->cms.disk_offset != 0) {
-                                snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name);
-                                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                                /* disk is reserved minidisk */
-                                offset = label->cms.disk_offset;
-                                size = (label->cms.block_count - 1)
-                                        * (blocksize >> 9);
-                        } else {
-                                snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name);
-                                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                                offset = (info->label_block + 1);
-                                size = label->cms.block_count
-                                        * (blocksize >> 9);
-                        }
-                        put_partition(state, 1, offset*(blocksize >> 9),
-                                      size-offset*(blocksize >> 9));
-                } else {
-                        if (strncmp(type, "LNX1", 4) == 0) {
-                                snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name);
-                                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                                if (label->lnx.ldl_version == 0xf2) {
-                                        fmt_size = label->lnx.formatted_blocks
-                                                * (blocksize >> 9);
-                                } else if (!strcmp(info->type, "ECKD")) {
-                                        /* formated w/o large volume support */
-                                        fmt_size = geo->cylinders * geo->heads
-                                              * geo->sectors * (blocksize >> 9);
-                                } else {
-                                        /* old label and no usable disk geometry
-                                         * (e.g. DIAG) */
-                                        fmt_size = i_size >> 9;
-                                }
-                                size = i_size >> 9;
-                                if (fmt_size < size)
-                                        size = fmt_size;
-                                offset = (info->label_block + 1);
-                        } else {
-                                /* unlabeled disk */
-                                strlcat(state->pp_buf, "(nonl)", PAGE_SIZE);
-                                size = i_size >> 9;
-                                offset = (info->label_block + 1);
-                        }
-                        put_partition(state, 1, offset*(blocksize >> 9),
-                                      size-offset*(blocksize >> 9));
-                }
-        } else if (info->format == DASD_FORMAT_CDL) {
-                /*
-                 * New style CDL formatted disk
-                 */
-                sector_t blk;
-                int counter;
-                /*
-                 * check if VOL1 label is available
-                 * if not, something is wrong, skipping partition detection
-                 */
-                if (strncmp(type, "VOL1",  4) == 0) {
-                        snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name);
-                        strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                        /*
-                         * get block number and read then go through format1
-                         * labels
-                         */
-                        blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
-                        counter = 0;
-                        data = read_part_sector(state, blk * (blocksize/512),
-                                                &sect);
-                        while (data != NULL) {
-                                struct vtoc_format1_label f1;
-                                memcpy(&f1, data,
-                                       sizeof(struct vtoc_format1_label));
-                                put_dev_sector(sect);
-                                /* skip FMT4 / FMT5 / FMT7 labels */
-                                if (f1.DS1FMTID == _ascebc['4']
-                                    || f1.DS1FMTID == _ascebc['5']
-                                    || f1.DS1FMTID == _ascebc['7']
-                                    || f1.DS1FMTID == _ascebc['9']) {
-                                        blk++;
-                                        data = read_part_sector(state,
-                                                blk * (blocksize/512), &sect);
-                                        continue;
-                                }
-                                /* only FMT1 and 8 labels valid at this point */
-                                if (f1.DS1FMTID != _ascebc['1'] &&
-                                    f1.DS1FMTID != _ascebc['8'])
-                                        break;
-                                /* OK, we got valid partition data */
-                                offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
-                                size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
-                                        offset + geo->sectors;
-                                if (counter >= state->limit)
-                                        break;
-                                put_partition(state, counter + 1,
-                                              offset * (blocksize >> 9),
-                                              size * (blocksize >> 9));
-                                counter++;
-                                blk++;
-                                data = read_part_sector(state,
-                                                blk * (blocksize/512), &sect);
-                        }
-                        if (!data)
-                                /* Are we not supposed to report this ? */
-                                goto out_readerr;
-                } else
-                        printk(KERN_WARNING "Warning, expected Label VOL1 not "
-                               "found, treating as CDL formated Disk");
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        goto out_freeall;
-out_readerr:
-        res = -1;
-out_freeall:
-        kfree(label);
-out_nolab:
-        kfree(geo);
-out_nogeo:
-        kfree(info);
-out_exit:
-        return res;
-}
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h
deleted file mode 100644
index 08fb0804a812..000000000000
--- a/fs/partitions/ibm.h
+++ /dev/null
@@ -1 +0,0 @@
-int ibm_partition(struct parsed_partitions *);
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
deleted file mode 100644
index 0ea19312706b..000000000000
--- a/fs/partitions/karma.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  fs/partitions/karma.c
- *  Rio Karma partition info.
- *
- *  Copyright (C) 2006 Bob Copeland (me@bobcopeland.com)
- *  based on osf.c
- */
-#include "check.h"
-#include "karma.h"
-int karma_partition(struct parsed_partitions *state)
-{
-        int i;
-        int slot = 1;
-        Sector sect;
-        unsigned char *data;
-        struct disklabel {
-                u8 d_reserved[270];
-                struct d_partition {
-                        __le32 p_res;
-                        u8 p_fstype;
-                        u8 p_res2[3];
-                        __le32 p_offset;
-                        __le32 p_size;
-                } d_partitions[2];
-                u8 d_blank[208];
-                __le16 d_magic;
-        } __attribute__((packed)) *label;
-        struct d_partition *p;
-        data = read_part_sector(state, 0, &sect);
-        if (!data)
-                return -1;
-        label = (struct disklabel *)data;
-        if (le16_to_cpu(label->d_magic) != KARMA_LABEL_MAGIC) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        p = label->d_partitions;
-        for (i = 0 ; i < 2; i++, p++) {
-                if (slot == state->limit)
-                        break;
-                if (p->p_fstype == 0x4d && le32_to_cpu(p->p_size)) {
-                        put_partition(state, slot, le32_to_cpu(p->p_offset),
-                                le32_to_cpu(p->p_size));
-                }
-                slot++;
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        put_dev_sector(sect);
-        return 1;
-}
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
deleted file mode 100644
index c764b2e9df21..000000000000
--- a/fs/partitions/karma.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- *  fs/partitions/karma.h
- */
-#define KARMA_LABEL_MAGIC               0xAB56
-int karma_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
deleted file mode 100644
index bd8ae788f689..000000000000
--- a/fs/partitions/ldm.c
+++ /dev/null
@@ -1,1570 +0,0 @@
-/**
- * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
- *
- * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
- * Copyright (c) 2001-2007 Anton Altaparmakov
- * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
- *
- * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads 
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation; either version 2 of the License, or (at your option) any later
- * version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program (in the main directory of the source in the file COPYING); if
- * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
- * Boston, MA  02111-1307  USA
- */
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/stringify.h>
-#include <linux/kernel.h>
-#include "ldm.h"
-#include "check.h"
-#include "msdos.h"
-/**
- * ldm_debug/info/error/crit - Output an error message
- * @f:    A printf format string containing the message
- * @...:  Variables to substitute into @f
- *
- * ldm_debug() writes a DEBUG level message to the syslog but only if the
- * driver was compiled with debug enabled. Otherwise, the call turns into a NOP.
- */
-#ifndef CONFIG_LDM_DEBUG
-#define ldm_debug(...)  do {} while (0)
-#else
-#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
-#endif
-#define ldm_crit(f, a...)  _ldm_printk (KERN_CRIT,  __func__, f, ##a)
-#define ldm_error(f, a...) _ldm_printk (KERN_ERR,   __func__, f, ##a)
-#define ldm_info(f, a...)  _ldm_printk (KERN_INFO,  __func__, f, ##a)
-static __printf(3, 4)
-void _ldm_printk(const char *level, const char *function, const char *fmt, ...)
-{
-        struct va_format vaf;
-        va_list args;
-        va_start (args, fmt);
-        vaf.fmt = fmt;
-        vaf.va = &args;
-        printk("%s%s(): %pV\n", level, function, &vaf);
-        va_end(args);
-}
-/**
- * ldm_parse_hexbyte - Convert a ASCII hex number to a byte
- * @src:  Pointer to at least 2 characters to convert.
- *
- * Convert a two character ASCII hex string to a number.
- *
- * Return:  0-255  Success, the byte was parsed correctly
- *          -1     Error, an invalid character was supplied
- */
-static int ldm_parse_hexbyte (const u8 *src)
-{
-        unsigned int x;         /* For correct wrapping */
-        int h;
-        /* high part */
-        x = h = hex_to_bin(src[0]);
-        if (h < 0)
-                return -1;
-        /* low part */
-        h = hex_to_bin(src[1]);
-        if (h < 0)
-                return -1;
-        return (x << 4) + h;
-}
-/**
- * ldm_parse_guid - Convert GUID from ASCII to binary
- * @src:   36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
- * @dest:  Memory block to hold binary GUID (16 bytes)
- *
- * N.B. The GUID need not be NULL terminated.
- *
- * Return:  'true'   @dest contains binary GUID
- *          'false'  @dest contents are undefined
- */
-static bool ldm_parse_guid (const u8 *src, u8 *dest)
-{
-        static const int size[] = { 4, 2, 2, 2, 6 };
-        int i, j, v;
-        if (src[8]  != '-' || src[13] != '-' ||
-            src[18] != '-' || src[23] != '-')
-                return false;
-        for (j = 0; j < 5; j++, src++)
-                for (i = 0; i < size[j]; i++, src+=2, *dest++ = v)
-                        if ((v = ldm_parse_hexbyte (src)) < 0)
-                                return false;
-        return true;
-}
-/**
- * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure
- * @data:  Raw database PRIVHEAD structure loaded from the device
- * @ph:    In-memory privhead structure in which to return parsed information
- *
- * This parses the LDM database PRIVHEAD structure supplied in @data and
- * sets up the in-memory privhead structure @ph with the obtained information.
- *
- * Return:  'true'   @ph contains the PRIVHEAD data
- *          'false'  @ph contents are undefined
- */
-static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
-{
-        bool is_vista = false;
-        BUG_ON(!data || !ph);
-        if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
-                ldm_error("Cannot find PRIVHEAD structure. LDM database is"
-                        " corrupt. Aborting.");
-                return false;
-        }
-        ph->ver_major = get_unaligned_be16(data + 0x000C);
-        ph->ver_minor = get_unaligned_be16(data + 0x000E);
-        ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
-        ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
-        ph->config_start = get_unaligned_be64(data + 0x012B);
-        ph->config_size = get_unaligned_be64(data + 0x0133);
-        /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
-        if (ph->ver_major == 2 && ph->ver_minor == 12)
-                is_vista = true;
-        if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) {
-                ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d."
-                        " Aborting.", ph->ver_major, ph->ver_minor);
-                return false;
-        }
-        ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major,
-                        ph->ver_minor, is_vista ? "Vista" : "2000/XP");
-        if (ph->config_size != LDM_DB_SIZE) {   /* 1 MiB in sectors. */
-                /* Warn the user and continue, carefully. */
-                ldm_info("Database is normally %u bytes, it claims to "
-                        "be %llu bytes.", LDM_DB_SIZE,
-                        (unsigned long long)ph->config_size);
-        }
-        if ((ph->logical_disk_size == 0) || (ph->logical_disk_start +
-                        ph->logical_disk_size > ph->config_start)) {
-                ldm_error("PRIVHEAD disk size doesn't match real disk size");
-                return false;
-        }
-        if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) {
-                ldm_error("PRIVHEAD contains an invalid GUID.");
-                return false;
-        }
-        ldm_debug("Parsed PRIVHEAD successfully.");
-        return true;
-}
-/**
- * ldm_parse_tocblock - Read the LDM Database TOCBLOCK structure
- * @data:  Raw database TOCBLOCK structure loaded from the device
- * @toc:   In-memory toc structure in which to return parsed information
- *
- * This parses the LDM Database TOCBLOCK (table of contents) structure supplied
- * in @data and sets up the in-memory tocblock structure @toc with the obtained
- * information.
- *
- * N.B.  The *_start and *_size values returned in @toc are not range-checked.
- *
- * Return:  'true'   @toc contains the TOCBLOCK data
- *          'false'  @toc contents are undefined
- */
-static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
-{
-        BUG_ON (!data || !toc);
-        if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
-                ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
-                return false;
-        }
-        strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
-        toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
-        toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
-        toc->bitmap1_size  = get_unaligned_be64(data + 0x36);
-        if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
-                        sizeof (toc->bitmap1_name)) != 0) {
-                ldm_crit ("TOCBLOCK's first bitmap is '%s', should be '%s'.",
-                                TOC_BITMAP1, toc->bitmap1_name);
-                return false;
-        }
-        strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
-        toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
-        toc->bitmap2_start = get_unaligned_be64(data + 0x50);
-        toc->bitmap2_size  = get_unaligned_be64(data + 0x58);
-        if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
-                        sizeof (toc->bitmap2_name)) != 0) {
-                ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
-                                TOC_BITMAP2, toc->bitmap2_name);
-                return false;
-        }
-        ldm_debug ("Parsed TOCBLOCK successfully.");
-        return true;
-}
-/**
- * ldm_parse_vmdb - Read the LDM Database VMDB structure
- * @data:  Raw database VMDB structure loaded from the device
- * @vm:    In-memory vmdb structure in which to return parsed information
- *
- * This parses the LDM Database VMDB structure supplied in @data and sets up
- * the in-memory vmdb structure @vm with the obtained information.
- *
- * N.B.  The *_start, *_size and *_seq values will be range-checked later.
- *
- * Return:  'true'   @vm contains VMDB info
- *          'false'  @vm contents are undefined
- */
-static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
-{
-        BUG_ON (!data || !vm);
-        if (MAGIC_VMDB != get_unaligned_be32(data)) {
-                ldm_crit ("Cannot find the VMDB, database may be corrupt.");
-                return false;
-        }
-        vm->ver_major = get_unaligned_be16(data + 0x12);
-        vm->ver_minor = get_unaligned_be16(data + 0x14);
-        if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
-                ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
-                        "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
-                return false;
-        }
-        vm->vblk_size     = get_unaligned_be32(data + 0x08);
-        if (vm->vblk_size == 0) {
-                ldm_error ("Illegal VBLK size");
-                return false;
-        }
-        vm->vblk_offset   = get_unaligned_be32(data + 0x0C);
-        vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
-        ldm_debug ("Parsed VMDB successfully.");
-        return true;
-}
-/**
- * ldm_compare_privheads - Compare two privhead objects
- * @ph1:  First privhead
- * @ph2:  Second privhead
- *
- * This compares the two privhead structures @ph1 and @ph2.
- *
- * Return:  'true'   Identical
- *          'false'  Different
- */
-static bool ldm_compare_privheads (const struct privhead *ph1,
-                                   const struct privhead *ph2)
-{
-        BUG_ON (!ph1 || !ph2);
-        return ((ph1->ver_major          == ph2->ver_major)             &&
-                (ph1->ver_minor          == ph2->ver_minor)             &&
-                (ph1->logical_disk_start == ph2->logical_disk_start)    &&
-                (ph1->logical_disk_size  == ph2->logical_disk_size)     &&
-                (ph1->config_start       == ph2->config_start)          &&
-                (ph1->config_size        == ph2->config_size)           &&
-                !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
-}
-/**
- * ldm_compare_tocblocks - Compare two tocblock objects
- * @toc1:  First toc
- * @toc2:  Second toc
- *
- * This compares the two tocblock structures @toc1 and @toc2.
- *
- * Return:  'true'   Identical
- *          'false'  Different
- */
-static bool ldm_compare_tocblocks (const struct tocblock *toc1,
-                                   const struct tocblock *toc2)
-{
-        BUG_ON (!toc1 || !toc2);
-        return ((toc1->bitmap1_start == toc2->bitmap1_start)    &&
-                (toc1->bitmap1_size  == toc2->bitmap1_size)     &&
-                (toc1->bitmap2_start == toc2->bitmap2_start)    &&
-                (toc1->bitmap2_size  == toc2->bitmap2_size)     &&
-                !strncmp (toc1->bitmap1_name, toc2->bitmap1_name,
-                        sizeof (toc1->bitmap1_name))            &&
-                !strncmp (toc1->bitmap2_name, toc2->bitmap2_name,
-                        sizeof (toc1->bitmap2_name)));
-}
-/**
- * ldm_validate_privheads - Compare the primary privhead with its backups
- * @state: Partition check state including device holding the LDM Database
- * @ph1:   Memory struct to fill with ph contents
- *
- * Read and compare all three privheads from disk.
- *
- * The privheads on disk show the size and location of the main disk area and
- * the configuration area (the database).  The values are range-checked against
- * @hd, which contains the real size of the disk.
- *
- * Return:  'true'   Success
- *          'false'  Error
- */
-static bool ldm_validate_privheads(struct parsed_partitions *state,
-                                   struct privhead *ph1)
-{
-        static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 };
-        struct privhead *ph[3] = { ph1 };
-        Sector sect;
-        u8 *data;
-        bool result = false;
-        long num_sects;
-        int i;
-        BUG_ON (!state || !ph1);
-        ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL);
-        ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL);
-        if (!ph[1] || !ph[2]) {
-                ldm_crit ("Out of memory.");
-                goto out;
-        }
-        /* off[1 & 2] are relative to ph[0]->config_start */
-        ph[0]->config_start = 0;
-        /* Read and parse privheads */
-        for (i = 0; i < 3; i++) {
-                data = read_part_sector(state, ph[0]->config_start + off[i],
-                                        &sect);
-                if (!data) {
-                        ldm_crit ("Disk read failed.");
-                        goto out;
-                }
-                result = ldm_parse_privhead (data, ph[i]);
-                put_dev_sector (sect);
-                if (!result) {
-                        ldm_error ("Cannot find PRIVHEAD %d.", i+1); /* Log again */
-                        if (i < 2)
-                                goto out;       /* Already logged */
-                        else
-                                break;  /* FIXME ignore for now, 3rd PH can fail on odd-sized disks */
-                }
-        }
-        num_sects = state->bdev->bd_inode->i_size >> 9;
-        if ((ph[0]->config_start > num_sects) ||
-           ((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
-                ldm_crit ("Database extends beyond the end of the disk.");
-                goto out;
-        }
-        if ((ph[0]->logical_disk_start > ph[0]->config_start) ||
-           ((ph[0]->logical_disk_start + ph[0]->logical_disk_size)
-                    > ph[0]->config_start)) {
-                ldm_crit ("Disk and database overlap.");
-                goto out;
-        }
-        if (!ldm_compare_privheads (ph[0], ph[1])) {
-                ldm_crit ("Primary and backup PRIVHEADs don't match.");
-                goto out;
-        }
-        /* FIXME ignore this for now
-        if (!ldm_compare_privheads (ph[0], ph[2])) {
-                ldm_crit ("Primary and backup PRIVHEADs don't match.");
-                goto out;
-        }*/
-        ldm_debug ("Validated PRIVHEADs successfully.");
-        result = true;
-out:
-        kfree (ph[1]);
-        kfree (ph[2]);
-        return result;
-}
-/**
- * ldm_validate_tocblocks - Validate the table of contents and its backups
- * @state: Partition check state including device holding the LDM Database
- * @base:  Offset, into @state->bdev, of the database
- * @ldb:   Cache of the database structures
- *
- * Find and compare the four tables of contents of the LDM Database stored on
- * @state->bdev and return the parsed information into @toc1.
- *
- * The offsets and sizes of the configs are range-checked against a privhead.
- *
- * Return:  'true'   @toc1 contains validated TOCBLOCK info
- *          'false'  @toc1 contents are undefined
- */
-static bool ldm_validate_tocblocks(struct parsed_partitions *state,
-                                   unsigned long base, struct ldmdb *ldb)
-{
-        static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4};
-        struct tocblock *tb[4];
-        struct privhead *ph;
-        Sector sect;
-        u8 *data;
-        int i, nr_tbs;
-        bool result = false;
-        BUG_ON(!state || !ldb);
-        ph = &ldb->ph;
-        tb[0] = &ldb->toc;
-        tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL);
-        if (!tb[1]) {
-                ldm_crit("Out of memory.");
-                goto err;
-        }
-        tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1]));
-        tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2]));
-        /*
-         * Try to read and parse all four TOCBLOCKs.
-         *
-         * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so
-         * skip any that fail as long as we get at least one valid TOCBLOCK.
-         */
-        for (nr_tbs = i = 0; i < 4; i++) {
-                data = read_part_sector(state, base + off[i], &sect);
-                if (!data) {
-                        ldm_error("Disk read failed for TOCBLOCK %d.", i);
-                        continue;
-                }
-                if (ldm_parse_tocblock(data, tb[nr_tbs]))
-                        nr_tbs++;
-                put_dev_sector(sect);
-        }
-        if (!nr_tbs) {
-                ldm_crit("Failed to find a valid TOCBLOCK.");
-                goto err;
-        }
-        /* Range check the TOCBLOCK against a privhead. */
-        if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) ||
-                        ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) >
-                        ph->config_size)) {
-                ldm_crit("The bitmaps are out of range.  Giving up.");
-                goto err;
-        }
-        /* Compare all loaded TOCBLOCKs. */
-        for (i = 1; i < nr_tbs; i++) {
-                if (!ldm_compare_tocblocks(tb[0], tb[i])) {
-                        ldm_crit("TOCBLOCKs 0 and %d do not match.", i);
-                        goto err;
-                }
-        }
-        ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs);
-        result = true;
-err:
-        kfree(tb[1]);
-        return result;
-}
-/**
- * ldm_validate_vmdb - Read the VMDB and validate it
- * @state: Partition check state including device holding the LDM Database
- * @base:  Offset, into @bdev, of the database
- * @ldb:   Cache of the database structures
- *
- * Find the vmdb of the LDM Database stored on @bdev and return the parsed
- * information in @ldb.
- *
- * Return:  'true'   @ldb contains validated VBDB info
- *          'false'  @ldb contents are undefined
- */
-static bool ldm_validate_vmdb(struct parsed_partitions *state,
-                              unsigned long base, struct ldmdb *ldb)
-{
-        Sector sect;
-        u8 *data;
-        bool result = false;
-        struct vmdb *vm;
-        struct tocblock *toc;
-        BUG_ON (!state || !ldb);
-        vm  = &ldb->vm;
-        toc = &ldb->toc;
-        data = read_part_sector(state, base + OFF_VMDB, &sect);
-        if (!data) {
-                ldm_crit ("Disk read failed.");
-                return false;
-        }
-        if (!ldm_parse_vmdb (data, vm))
-                goto out;                               /* Already logged */
-        /* Are there uncommitted transactions? */
-        if (get_unaligned_be16(data + 0x10) != 0x01) {
-                ldm_crit ("Database is not in a consistent state.  Aborting.");
-                goto out;
-        }
-        if (vm->vblk_offset != 512)
-                ldm_info ("VBLKs start at offset 0x%04x.", vm->vblk_offset);
-        /*
-         * The last_vblkd_seq can be before the end of the vmdb, just make sure
-         * it is not out of bounds.
-         */
-        if ((vm->vblk_size * vm->last_vblk_seq) > (toc->bitmap1_size << 9)) {
-                ldm_crit ("VMDB exceeds allowed size specified by TOCBLOCK.  "
-                                "Database is corrupt.  Aborting.");
-                goto out;
-        }
-        result = true;
-out:
-        put_dev_sector (sect);
-        return result;
-}
-/**
- * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk
- * @state: Partition check state including device holding the LDM Database
- *
- * This function provides a weak test to decide whether the device is a dynamic
- * disk or not.  It looks for an MS-DOS-style partition table containing at
- * least one partition of type 0x42 (formerly SFS, now used by Windows for
- * dynamic disks).
- *
- * N.B.  The only possible error can come from the read_part_sector and that is
- *       only likely to happen if the underlying device is strange.  If that IS
- *       the case we should return zero to let someone else try.
- *
- * Return:  'true'   @state->bdev is a dynamic disk
- *          'false'  @state->bdev is not a dynamic disk, or an error occurred
- */
-static bool ldm_validate_partition_table(struct parsed_partitions *state)
-{
-        Sector sect;
-        u8 *data;
-        struct partition *p;
-        int i;
-        bool result = false;
-        BUG_ON(!state);
-        data = read_part_sector(state, 0, &sect);
-        if (!data) {
-                ldm_info ("Disk read failed.");
-                return false;
-        }
-        if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC))
-                goto out;
-        p = (struct partition*)(data + 0x01BE);
-        for (i = 0; i < 4; i++, p++)
-                if (SYS_IND (p) == LDM_PARTITION) {
-                        result = true;
-                        break;
-                }
-        if (result)
-                ldm_debug ("Found W2K dynamic disk partition type.");
-out:
-        put_dev_sector (sect);
-        return result;
-}
-/**
- * ldm_get_disk_objid - Search a linked list of vblk's for a given Disk Id
- * @ldb:  Cache of the database structures
- *
- * The LDM Database contains a list of all partitions on all dynamic disks.
- * The primary PRIVHEAD, at the beginning of the physical disk, tells us
- * the GUID of this disk.  This function searches for the GUID in a linked
- * list of vblk's.
- *
- * Return:  Pointer, A matching vblk was found
- *          NULL,    No match, or an error
- */
-static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb)
-{
-        struct list_head *item;
-        BUG_ON (!ldb);
-        list_for_each (item, &ldb->v_disk) {
-                struct vblk *v = list_entry (item, struct vblk, list);
-                if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
-                        return v;
-        }
-        return NULL;
-}
-/**
- * ldm_create_data_partitions - Create data partitions for this device
- * @pp:   List of the partitions parsed so far
- * @ldb:  Cache of the database structures
- *
- * The database contains ALL the partitions for ALL disk groups, so we need to
- * filter out this specific disk. Using the disk's object id, we can find all
- * the partitions in the database that belong to this disk.
- *
- * Add each partition in our database, to the parsed_partitions structure.
- *
- * N.B.  This function creates the partitions in the order it finds partition
- *       objects in the linked list.
- *
- * Return:  'true'   Partition created
- *          'false'  Error, probably a range checking problem
- */
-static bool ldm_create_data_partitions (struct parsed_partitions *pp,
-                                        const struct ldmdb *ldb)
-{
-        struct list_head *item;
-        struct vblk *vb;
-        struct vblk *disk;
-        struct vblk_part *part;
-        int part_num = 1;
-        BUG_ON (!pp || !ldb);
-        disk = ldm_get_disk_objid (ldb);
-        if (!disk) {
-                ldm_crit ("Can't find the ID of this disk in the database.");
-                return false;
-        }
-        strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE);
-        /* Create the data partitions */
-        list_for_each (item, &ldb->v_part) {
-                vb = list_entry (item, struct vblk, list);
-                part = &vb->vblk.part;
-                if (part->disk_id != disk->obj_id)
-                        continue;
-                put_partition (pp, part_num, ldb->ph.logical_disk_start +
-                                part->start, part->size);
-                part_num++;
-        }
-        strlcat(pp->pp_buf, "\n", PAGE_SIZE);
-        return true;
-}
-/**
- * ldm_relative - Calculate the next relative offset
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @base:    Size of the previous fixed width fields
- * @offset:  Cumulative size of the previous variable-width fields
- *
- * Because many of the VBLK fields are variable-width, it's necessary
- * to calculate each offset based on the previous one and the length
- * of the field it pointed to.
- *
- * Return:  -1 Error, the calculated offset exceeded the size of the buffer
- *           n OK, a range-checked offset into buffer
- */
-static int ldm_relative(const u8 *buffer, int buflen, int base, int offset)
-{
-        base += offset;
-        if (!buffer || offset < 0 || base > buflen) {
-                if (!buffer)
-                        ldm_error("!buffer");
-                if (offset < 0)
-                        ldm_error("offset (%d) < 0", offset);
-                if (base > buflen)
-                        ldm_error("base (%d) > buflen (%d)", base, buflen);
-                return -1;
-        }
-        if (base + buffer[base] >= buflen) {
-                ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base,
-                                buffer[base], buflen);
-                return -1;
-        }
-        return buffer[base] + offset + 1;
-}
-/**
- * ldm_get_vnum - Convert a variable-width, big endian number, into cpu order
- * @block:  Pointer to the variable-width number to convert
- *
- * Large numbers in the LDM Database are often stored in a packed format.  Each
- * number is prefixed by a one byte width marker.  All numbers in the database
- * are stored in big-endian byte order.  This function reads one of these
- * numbers and returns the result
- *
- * N.B.  This function DOES NOT perform any range checking, though the most
- *       it will read is eight bytes.
- *
- * Return:  n A number
- *          0 Zero, or an error occurred
- */
-static u64 ldm_get_vnum (const u8 *block)
-{
-        u64 tmp = 0;
-        u8 length;
-        BUG_ON (!block);
-        length = *block++;
-        if (length && length <= 8)
-                while (length--)
-                        tmp = (tmp << 8) | *block++;
-        else
-                ldm_error ("Illegal length %d.", length);
-        return tmp;
-}
-/**
- * ldm_get_vstr - Read a length-prefixed string into a buffer
- * @block:   Pointer to the length marker
- * @buffer:  Location to copy string to
- * @buflen:  Size of the output buffer
- *
- * Many of the strings in the LDM Database are not NULL terminated.  Instead
- * they are prefixed by a one byte length marker.  This function copies one of
- * these strings into a buffer.
- *
- * N.B.  This function DOES NOT perform any range checking on the input.
- *       If the buffer is too small, the output will be truncated.
- *
- * Return:  0, Error and @buffer contents are undefined
- *          n, String length in characters (excluding NULL)
- *          buflen-1, String was truncated.
- */
-static int ldm_get_vstr (const u8 *block, u8 *buffer, int buflen)
-{
-        int length;
-        BUG_ON (!block || !buffer);
-        length = block[0];
-        if (length >= buflen) {
-                ldm_error ("Truncating string %d -> %d.", length, buflen);
-                length = buflen - 1;
-        }
-        memcpy (buffer, block + 1, length);
-        buffer[length] = 0;
-        return length;
-}
-/**
- * ldm_parse_cmp3 - Read a raw VBLK Component object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Component object (version 3) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Component VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
-{
-        int r_objid, r_name, r_vstate, r_child, r_parent, r_stripe, r_cols, len;
-        struct vblk_comp *comp;
-        BUG_ON (!buffer || !vb);
-        r_objid  = ldm_relative (buffer, buflen, 0x18, 0);
-        r_name   = ldm_relative (buffer, buflen, 0x18, r_objid);
-        r_vstate = ldm_relative (buffer, buflen, 0x18, r_name);
-        r_child  = ldm_relative (buffer, buflen, 0x1D, r_vstate);
-        r_parent = ldm_relative (buffer, buflen, 0x2D, r_child);
-        if (buffer[0x12] & VBLK_FLAG_COMP_STRIPE) {
-                r_stripe = ldm_relative (buffer, buflen, 0x2E, r_parent);
-                r_cols   = ldm_relative (buffer, buflen, 0x2E, r_stripe);
-                len = r_cols;
-        } else {
-                r_stripe = 0;
-                r_cols   = 0;
-                len = r_parent;
-        }
-        if (len < 0)
-                return false;
-        len += VBLK_SIZE_CMP3;
-        if (len != get_unaligned_be32(buffer + 0x14))
-                return false;
-        comp = &vb->vblk.comp;
-        ldm_get_vstr (buffer + 0x18 + r_name, comp->state,
-                sizeof (comp->state));
-        comp->type      = buffer[0x18 + r_vstate];
-        comp->children  = ldm_get_vnum (buffer + 0x1D + r_vstate);
-        comp->parent_id = ldm_get_vnum (buffer + 0x2D + r_child);
-        comp->chunksize = r_stripe ? ldm_get_vnum (buffer+r_parent+0x2E) : 0;
-        return true;
-}
-/**
- * ldm_parse_dgr3 - Read a raw VBLK Disk Group object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Disk Group object (version 3) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Disk Group VBLK
- *          'false'  @vb contents are not defined
- */
-static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
-{
-        int r_objid, r_name, r_diskid, r_id1, r_id2, len;
-        struct vblk_dgrp *dgrp;
-        BUG_ON (!buffer || !vb);
-        r_objid  = ldm_relative (buffer, buflen, 0x18, 0);
-        r_name   = ldm_relative (buffer, buflen, 0x18, r_objid);
-        r_diskid = ldm_relative (buffer, buflen, 0x18, r_name);
-        if (buffer[0x12] & VBLK_FLAG_DGR3_IDS) {
-                r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
-                r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
-                len = r_id2;
-        } else {
-                r_id1 = 0;
-                r_id2 = 0;
-                len = r_diskid;
-        }
-        if (len < 0)
-                return false;
-        len += VBLK_SIZE_DGR3;
-        if (len != get_unaligned_be32(buffer + 0x14))
-                return false;
-        dgrp = &vb->vblk.dgrp;
-        ldm_get_vstr (buffer + 0x18 + r_name, dgrp->disk_id,
-                sizeof (dgrp->disk_id));
-        return true;
-}
-/**
- * ldm_parse_dgr4 - Read a raw VBLK Disk Group object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Disk Group object (version 4) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Disk Group VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
-{
-        char buf[64];
-        int r_objid, r_name, r_id1, r_id2, len;
-        struct vblk_dgrp *dgrp;
-        BUG_ON (!buffer || !vb);
-        r_objid  = ldm_relative (buffer, buflen, 0x18, 0);
-        r_name   = ldm_relative (buffer, buflen, 0x18, r_objid);
-        if (buffer[0x12] & VBLK_FLAG_DGR4_IDS) {
-                r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
-                r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
-                len = r_id2;
-        } else {
-                r_id1 = 0;
-                r_id2 = 0;
-                len = r_name;
-        }
-        if (len < 0)
-                return false;
-        len += VBLK_SIZE_DGR4;
-        if (len != get_unaligned_be32(buffer + 0x14))
-                return false;
-        dgrp = &vb->vblk.dgrp;
-        ldm_get_vstr (buffer + 0x18 + r_objid, buf, sizeof (buf));
-        return true;
-}
-/**
- * ldm_parse_dsk3 - Read a raw VBLK Disk object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Disk object (version 3) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Disk VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
-{
-        int r_objid, r_name, r_diskid, r_altname, len;
-        struct vblk_disk *disk;
-        BUG_ON (!buffer || !vb);
-        r_objid   = ldm_relative (buffer, buflen, 0x18, 0);
-        r_name    = ldm_relative (buffer, buflen, 0x18, r_objid);
-        r_diskid  = ldm_relative (buffer, buflen, 0x18, r_name);
-        r_altname = ldm_relative (buffer, buflen, 0x18, r_diskid);
-        len = r_altname;
-        if (len < 0)
-                return false;
-        len += VBLK_SIZE_DSK3;
-        if (len != get_unaligned_be32(buffer + 0x14))
-                return false;
-        disk = &vb->vblk.disk;
-        ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
-                sizeof (disk->alt_name));
-        if (!ldm_parse_guid (buffer + 0x19 + r_name, disk->disk_id))
-                return false;
-        return true;
-}
-/**
- * ldm_parse_dsk4 - Read a raw VBLK Disk object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Disk object (version 4) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Disk VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
-{
-        int r_objid, r_name, len;
-        struct vblk_disk *disk;
-        BUG_ON (!buffer || !vb);
-        r_objid = ldm_relative (buffer, buflen, 0x18, 0);
-        r_name  = ldm_relative (buffer, buflen, 0x18, r_objid);
-        len     = r_name;
-        if (len < 0)
-                return false;
-        len += VBLK_SIZE_DSK4;
-        if (len != get_unaligned_be32(buffer + 0x14))
-                return false;
-        disk = &vb->vblk.disk;
-        memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
-        return true;
-}
-/**
- * ldm_parse_prt3 - Read a raw VBLK Partition object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Partition object (version 3) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Partition VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
-{
-        int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len;
-        struct vblk_part *part;
-        BUG_ON(!buffer || !vb);
-        r_objid = ldm_relative(buffer, buflen, 0x18, 0);
-        if (r_objid < 0) {
-                ldm_error("r_objid %d < 0", r_objid);
-                return false;
-        }
-        r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
-        if (r_name < 0) {
-                ldm_error("r_name %d < 0", r_name);
-                return false;
-        }
-        r_size = ldm_relative(buffer, buflen, 0x34, r_name);
-        if (r_size < 0) {
-                ldm_error("r_size %d < 0", r_size);
-                return false;
-        }
-        r_parent = ldm_relative(buffer, buflen, 0x34, r_size);
-        if (r_parent < 0) {
-                ldm_error("r_parent %d < 0", r_parent);
-                return false;
-        }
-        r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent);
-        if (r_diskid < 0) {
-                ldm_error("r_diskid %d < 0", r_diskid);
-                return false;
-        }
-        if (buffer[0x12] & VBLK_FLAG_PART_INDEX) {
-                r_index = ldm_relative(buffer, buflen, 0x34, r_diskid);
-                if (r_index < 0) {
-                        ldm_error("r_index %d < 0", r_index);
-                        return false;
-                }
-                len = r_index;
-        } else {
-                r_index = 0;
-                len = r_diskid;
-        }
-        if (len < 0) {
-                ldm_error("len %d < 0", len);
-                return false;
-        }
-        len += VBLK_SIZE_PRT3;
-        if (len > get_unaligned_be32(buffer + 0x14)) {
-                ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-                                get_unaligned_be32(buffer + 0x14));
-                return false;
-        }
-        part = &vb->vblk.part;
-        part->start = get_unaligned_be64(buffer + 0x24 + r_name);
-        part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
-        part->size = ldm_get_vnum(buffer + 0x34 + r_name);
-        part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
-        part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
-        if (vb->flags & VBLK_FLAG_PART_INDEX)
-                part->partnum = buffer[0x35 + r_diskid];
-        else
-                part->partnum = 0;
-        return true;
-}
-/**
- * ldm_parse_vol5 - Read a raw VBLK Volume object into a vblk structure
- * @buffer:  Block of data being worked on
- * @buflen:  Size of the block of data
- * @vb:      In-memory vblk in which to return information
- *
- * Read a raw VBLK Volume object (version 5) into a vblk structure.
- *
- * Return:  'true'   @vb contains a Volume VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
-{
-        int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size;
-        int r_id1, r_id2, r_size2, r_drive, len;
-        struct vblk_volu *volu;
-        BUG_ON(!buffer || !vb);
-        r_objid = ldm_relative(buffer, buflen, 0x18, 0);
-        if (r_objid < 0) {
-                ldm_error("r_objid %d < 0", r_objid);
-                return false;
-        }
-        r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
-        if (r_name < 0) {
-                ldm_error("r_name %d < 0", r_name);
-                return false;
-        }
-        r_vtype = ldm_relative(buffer, buflen, 0x18, r_name);
-        if (r_vtype < 0) {
-                ldm_error("r_vtype %d < 0", r_vtype);
-                return false;
-        }
-        r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype);
-        if (r_disable_drive_letter < 0) {
-                ldm_error("r_disable_drive_letter %d < 0",
-                                r_disable_drive_letter);
-                return false;
-        }
-        r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter);
-        if (r_child < 0) {
-                ldm_error("r_child %d < 0", r_child);
-                return false;
-        }
-        r_size = ldm_relative(buffer, buflen, 0x3D, r_child);
-        if (r_size < 0) {
-                ldm_error("r_size %d < 0", r_size);
-                return false;
-        }
-        if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) {
-                r_id1 = ldm_relative(buffer, buflen, 0x52, r_size);
-                if (r_id1 < 0) {
-                        ldm_error("r_id1 %d < 0", r_id1);
-                        return false;
-                }
-        } else
-                r_id1 = r_size;
-        if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) {
-                r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1);
-                if (r_id2 < 0) {
-                        ldm_error("r_id2 %d < 0", r_id2);
-                        return false;
-                }
-        } else
-                r_id2 = r_id1;
-        if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) {
-                r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2);
-                if (r_size2 < 0) {
-                        ldm_error("r_size2 %d < 0", r_size2);
-                        return false;
-                }
-        } else
-                r_size2 = r_id2;
-        if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
-                r_drive = ldm_relative(buffer, buflen, 0x52, r_size2);
-                if (r_drive < 0) {
-                        ldm_error("r_drive %d < 0", r_drive);
-                        return false;
-                }
-        } else
-                r_drive = r_size2;
-        len = r_drive;
-        if (len < 0) {
-                ldm_error("len %d < 0", len);
-                return false;
-        }
-        len += VBLK_SIZE_VOL5;
-        if (len > get_unaligned_be32(buffer + 0x14)) {
-                ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-                                get_unaligned_be32(buffer + 0x14));
-                return false;
-        }
-        volu = &vb->vblk.volu;
-        ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type,
-                        sizeof(volu->volume_type));
-        memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter,
-                        sizeof(volu->volume_state));
-        volu->size = ldm_get_vnum(buffer + 0x3D + r_child);
-        volu->partition_type = buffer[0x41 + r_size];
-        memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid));
-        if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
-                ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint,
-                                sizeof(volu->drive_hint));
-        }
-        return true;
-}
-/**
- * ldm_parse_vblk - Read a raw VBLK object into a vblk structure
- * @buf:  Block of data being worked on
- * @len:  Size of the block of data
- * @vb:   In-memory vblk in which to return information
- *
- * Read a raw VBLK object into a vblk structure.  This function just reads the
- * information common to all VBLK types, then delegates the rest of the work to
- * helper functions: ldm_parse_*.
- *
- * Return:  'true'   @vb contains a VBLK
- *          'false'  @vb contents are not defined
- */
-static bool ldm_parse_vblk (const u8 *buf, int len, struct vblk *vb)
-{
-        bool result = false;
-        int r_objid;
-        BUG_ON (!buf || !vb);
-        r_objid = ldm_relative (buf, len, 0x18, 0);
-        if (r_objid < 0) {
-                ldm_error ("VBLK header is corrupt.");
-                return false;
-        }
-        vb->flags  = buf[0x12];
-        vb->type   = buf[0x13];
-        vb->obj_id = ldm_get_vnum (buf + 0x18);
-        ldm_get_vstr (buf+0x18+r_objid, vb->name, sizeof (vb->name));
-        switch (vb->type) {
-                case VBLK_CMP3:  result = ldm_parse_cmp3 (buf, len, vb); break;
-                case VBLK_DSK3:  result = ldm_parse_dsk3 (buf, len, vb); break;
-                case VBLK_DSK4:  result = ldm_parse_dsk4 (buf, len, vb); break;
-                case VBLK_DGR3:  result = ldm_parse_dgr3 (buf, len, vb); break;
-                case VBLK_DGR4:  result = ldm_parse_dgr4 (buf, len, vb); break;
-                case VBLK_PRT3:  result = ldm_parse_prt3 (buf, len, vb); break;
-                case VBLK_VOL5:  result = ldm_parse_vol5 (buf, len, vb); break;
-        }
-        if (result)
-                ldm_debug ("Parsed VBLK 0x%llx (type: 0x%02x) ok.",
-                         (unsigned long long) vb->obj_id, vb->type);
-        else
-                ldm_error ("Failed to parse VBLK 0x%llx (type: 0x%02x).",
-                        (unsigned long long) vb->obj_id, vb->type);
-        return result;
-}
-/**
- * ldm_ldmdb_add - Adds a raw VBLK entry to the ldmdb database
- * @data:  Raw VBLK to add to the database
- * @len:   Size of the raw VBLK
- * @ldb:   Cache of the database structures
- *
- * The VBLKs are sorted into categories.  Partitions are also sorted by offset.
- *
- * N.B.  This function does not check the validity of the VBLKs.
- *
- * Return:  'true'   The VBLK was added
- *          'false'  An error occurred
- */
-static bool ldm_ldmdb_add (u8 *data, int len, struct ldmdb *ldb)
-{
-        struct vblk *vb;
-        struct list_head *item;
-        BUG_ON (!data || !ldb);
-        vb = kmalloc (sizeof (*vb), GFP_KERNEL);
-        if (!vb) {
-                ldm_crit ("Out of memory.");
-                return false;
-        }
-        if (!ldm_parse_vblk (data, len, vb)) {
-                kfree(vb);
-                return false;                   /* Already logged */
-        }
-        /* Put vblk into the correct list. */
-        switch (vb->type) {
-        case VBLK_DGR3:
-        case VBLK_DGR4:
-                list_add (&vb->list, &ldb->v_dgrp);
-                break;
-        case VBLK_DSK3:
-        case VBLK_DSK4:
-                list_add (&vb->list, &ldb->v_disk);
-                break;
-        case VBLK_VOL5:
-                list_add (&vb->list, &ldb->v_volu);
-                break;
-        case VBLK_CMP3:
-                list_add (&vb->list, &ldb->v_comp);
-                break;
-        case VBLK_PRT3:
-                /* Sort by the partition's start sector. */
-                list_for_each (item, &ldb->v_part) {
-                        struct vblk *v = list_entry (item, struct vblk, list);
-                        if ((v->vblk.part.disk_id == vb->vblk.part.disk_id) &&
-                            (v->vblk.part.start > vb->vblk.part.start)) {
-                                list_add_tail (&vb->list, &v->list);
-                                return true;
-                        }
-                }
-                list_add_tail (&vb->list, &ldb->v_part);
-                break;
-        }
-        return true;
-}
-/**
- * ldm_frag_add - Add a VBLK fragment to a list
- * @data:   Raw fragment to be added to the list
- * @size:   Size of the raw fragment
- * @frags:  Linked list of VBLK fragments
- *
- * Fragmented VBLKs may not be consecutive in the database, so they are placed
- * in a list so they can be pieced together later.
- *
- * Return:  'true'   Success, the VBLK was added to the list
- *          'false'  Error, a problem occurred
- */
-static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
-{
-        struct frag *f;
-        struct list_head *item;
-        int rec, num, group;
-        BUG_ON (!data || !frags);
-        if (size < 2 * VBLK_SIZE_HEAD) {
-                ldm_error("Value of size is to small.");
-                return false;
-        }
-        group = get_unaligned_be32(data + 0x08);
-        rec   = get_unaligned_be16(data + 0x0C);
-        num   = get_unaligned_be16(data + 0x0E);
-        if ((num < 1) || (num > 4)) {
-                ldm_error ("A VBLK claims to have %d parts.", num);
-                return false;
-        }
-        if (rec >= num) {
-                ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
-                return false;
-        }
-        list_for_each (item, frags) {
-                f = list_entry (item, struct frag, list);
-                if (f->group == group)
-                        goto found;
-        }
-        f = kmalloc (sizeof (*f) + size*num, GFP_KERNEL);
-        if (!f) {
-                ldm_crit ("Out of memory.");
-                return false;
-        }
-        f->group = group;
-        f->num   = num;
-        f->rec   = rec;
-        f->map   = 0xFF << num;
-        list_add_tail (&f->list, frags);
-found:
-        if (rec >= f->num) {
-                ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
-                return false;
-        }
-        if (f->map & (1 << rec)) {
-                ldm_error ("Duplicate VBLK, part %d.", rec);
-                f->map &= 0x7F;                 /* Mark the group as broken */
-                return false;
-        }
-        f->map |= (1 << rec);
-        data += VBLK_SIZE_HEAD;
-        size -= VBLK_SIZE_HEAD;
-        memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
-        return true;
-}
-/**
- * ldm_frag_free - Free a linked list of VBLK fragments
- * @list:  Linked list of fragments
- *
- * Free a linked list of VBLK fragments
- *
- * Return:  none
- */
-static void ldm_frag_free (struct list_head *list)
-{
-        struct list_head *item, *tmp;
-        BUG_ON (!list);
-        list_for_each_safe (item, tmp, list)
-                kfree (list_entry (item, struct frag, list));
-}
-/**
- * ldm_frag_commit - Validate fragmented VBLKs and add them to the database
- * @frags:  Linked list of VBLK fragments
- * @ldb:    Cache of the database structures
- *
- * Now that all the fragmented VBLKs have been collected, they must be added to
- * the database for later use.
- *
- * Return:  'true'   All the fragments we added successfully
- *          'false'  One or more of the fragments we invalid
- */
-static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
-{
-        struct frag *f;
-        struct list_head *item;
-        BUG_ON (!frags || !ldb);
-        list_for_each (item, frags) {
-                f = list_entry (item, struct frag, list);
-                if (f->map != 0xFF) {
-                        ldm_error ("VBLK group %d is incomplete (0x%02x).",
-                                f->group, f->map);
-                        return false;
-                }
-                if (!ldm_ldmdb_add (f->data, f->num*ldb->vm.vblk_size, ldb))
-                        return false;           /* Already logged */
-        }
-        return true;
-}
-/**
- * ldm_get_vblks - Read the on-disk database of VBLKs into memory
- * @state: Partition check state including device holding the LDM Database
- * @base:  Offset, into @state->bdev, of the database
- * @ldb:   Cache of the database structures
- *
- * To use the information from the VBLKs, they need to be read from the disk,
- * unpacked and validated.  We cache them in @ldb according to their type.
- *
- * Return:  'true'   All the VBLKs were read successfully
- *          'false'  An error occurred
- */
-static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base,
-                          struct ldmdb *ldb)
-{
-        int size, perbuf, skip, finish, s, v, recs;
-        u8 *data = NULL;
-        Sector sect;
-        bool result = false;
-        LIST_HEAD (frags);
-        BUG_ON(!state || !ldb);
-        size   = ldb->vm.vblk_size;
-        perbuf = 512 / size;
-        skip   = ldb->vm.vblk_offset >> 9;              /* Bytes to sectors */
-        finish = (size * ldb->vm.last_vblk_seq) >> 9;
-        for (s = skip; s < finish; s++) {               /* For each sector */
-                data = read_part_sector(state, base + OFF_VMDB + s, &sect);
-                if (!data) {
-                        ldm_crit ("Disk read failed.");
-                        goto out;
-                }
-                for (v = 0; v < perbuf; v++, data+=size) {  /* For each vblk */
-                        if (MAGIC_VBLK != get_unaligned_be32(data)) {
-                                ldm_error ("Expected to find a VBLK.");
-                                goto out;
-                        }
-                        recs = get_unaligned_be16(data + 0x0E); /* Number of records */
-                        if (recs == 1) {
-                                if (!ldm_ldmdb_add (data, size, ldb))
-                                        goto out;       /* Already logged */
-                        } else if (recs > 1) {
-                                if (!ldm_frag_add (data, size, &frags))
-                                        goto out;       /* Already logged */
-                        }
-                        /* else Record is not in use, ignore it. */
-                }
-                put_dev_sector (sect);
-                data = NULL;
-        }
-        result = ldm_frag_commit (&frags, ldb); /* Failures, already logged */
-out:
-        if (data)
-                put_dev_sector (sect);
-        ldm_frag_free (&frags);
-        return result;
-}
-/**
- * ldm_free_vblks - Free a linked list of vblk's
- * @lh:  Head of a linked list of struct vblk
- *
- * Free a list of vblk's and free the memory used to maintain the list.
- *
- * Return:  none
- */
-static void ldm_free_vblks (struct list_head *lh)
-{
-        struct list_head *item, *tmp;
-        BUG_ON (!lh);
-        list_for_each_safe (item, tmp, lh)
-                kfree (list_entry (item, struct vblk, list));
-}
-/**
- * ldm_partition - Find out whether a device is a dynamic disk and handle it
- * @state: Partition check state including device holding the LDM Database
- *
- * This determines whether the device @bdev is a dynamic disk and if so creates
- * the partitions necessary in the gendisk structure pointed to by @hd.
- *
- * We create a dummy device 1, which contains the LDM database, and then create
- * each partition described by the LDM database in sequence as devices 2+. For
- * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
- * and so on: the actual data containing partitions.
- *
- * Return:  1 Success, @state->bdev is a dynamic disk and we handled it
- *          0 Success, @state->bdev is not a dynamic disk
- *         -1 An error occurred before enough information had been read
- *            Or @state->bdev is a dynamic disk, but it may be corrupted
- */
-int ldm_partition(struct parsed_partitions *state)
-{
-        struct ldmdb  *ldb;
-        unsigned long base;
-        int result = -1;
-        BUG_ON(!state);
-        /* Look for signs of a Dynamic Disk */
-        if (!ldm_validate_partition_table(state))
-                return 0;
-        ldb = kmalloc (sizeof (*ldb), GFP_KERNEL);
-        if (!ldb) {
-                ldm_crit ("Out of memory.");
-                goto out;
-        }
-        /* Parse and check privheads. */
-        if (!ldm_validate_privheads(state, &ldb->ph))
-                goto out;               /* Already logged */
-        /* All further references are relative to base (database start). */
-        base = ldb->ph.config_start;
-        /* Parse and check tocs and vmdb. */
-        if (!ldm_validate_tocblocks(state, base, ldb) ||
-            !ldm_validate_vmdb(state, base, ldb))
-                goto out;               /* Already logged */
-        /* Initialize vblk lists in ldmdb struct */
-        INIT_LIST_HEAD (&ldb->v_dgrp);
-        INIT_LIST_HEAD (&ldb->v_disk);
-        INIT_LIST_HEAD (&ldb->v_volu);
-        INIT_LIST_HEAD (&ldb->v_comp);
-        INIT_LIST_HEAD (&ldb->v_part);
-        if (!ldm_get_vblks(state, base, ldb)) {
-                ldm_crit ("Failed to read the VBLKs from the database.");
-                goto cleanup;
-        }
-        /* Finally, create the data partition devices. */
-        if (ldm_create_data_partitions(state, ldb)) {
-                ldm_debug ("Parsed LDM database successfully.");
-                result = 1;
-        }
-        /* else Already logged */
-cleanup:
-        ldm_free_vblks (&ldb->v_dgrp);
-        ldm_free_vblks (&ldb->v_disk);
-        ldm_free_vblks (&ldb->v_volu);
-        ldm_free_vblks (&ldb->v_comp);
-        ldm_free_vblks (&ldb->v_part);
-out:
-        kfree (ldb);
-        return result;
-}
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
deleted file mode 100644
index 374242c0971a..000000000000
--- a/fs/partitions/ldm.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/**
- * ldm - Part of the Linux-NTFS project.
- *
- * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
- * Copyright (c) 2001-2007 Anton Altaparmakov
- * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
- *
- * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads 
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program (in the main directory of the Linux-NTFS source
- * in the file COPYING); if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef _FS_PT_LDM_H_
-#define _FS_PT_LDM_H_
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/genhd.h>
-#include <linux/fs.h>
-#include <asm/unaligned.h>
-#include <asm/byteorder.h>
-struct parsed_partitions;
-/* Magic numbers in CPU format. */
-#define MAGIC_VMDB      0x564D4442              /* VMDB */
-#define MAGIC_VBLK      0x56424C4B              /* VBLK */
-#define MAGIC_PRIVHEAD  0x5052495648454144ULL   /* PRIVHEAD */
-#define MAGIC_TOCBLOCK  0x544F43424C4F434BULL   /* TOCBLOCK */
-/* The defined vblk types. */
-#define VBLK_VOL5               0x51            /* Volume,     version 5 */
-#define VBLK_CMP3               0x32            /* Component,  version 3 */
-#define VBLK_PRT3               0x33            /* Partition,  version 3 */
-#define VBLK_DSK3               0x34            /* Disk,       version 3 */
-#define VBLK_DSK4               0x44            /* Disk,       version 4 */
-#define VBLK_DGR3               0x35            /* Disk Group, version 3 */
-#define VBLK_DGR4               0x45            /* Disk Group, version 4 */
-/* vblk flags indicating extra information will be present */
-#define VBLK_FLAG_COMP_STRIPE   0x10
-#define VBLK_FLAG_PART_INDEX    0x08
-#define VBLK_FLAG_DGR3_IDS      0x08
-#define VBLK_FLAG_DGR4_IDS      0x08
-#define VBLK_FLAG_VOLU_ID1      0x08
-#define VBLK_FLAG_VOLU_ID2      0x20
-#define VBLK_FLAG_VOLU_SIZE     0x80
-#define VBLK_FLAG_VOLU_DRIVE    0x02
-/* size of a vblk's static parts */
-#define VBLK_SIZE_HEAD          16
-#define VBLK_SIZE_CMP3          22              /* Name and version */
-#define VBLK_SIZE_DGR3          12
-#define VBLK_SIZE_DGR4          44
-#define VBLK_SIZE_DSK3          12
-#define VBLK_SIZE_DSK4          45
-#define VBLK_SIZE_PRT3          28
-#define VBLK_SIZE_VOL5          58
-/* component types */
-#define COMP_STRIPE             0x01            /* Stripe-set */
-#define COMP_BASIC              0x02            /* Basic disk */
-#define COMP_RAID               0x03            /* Raid-set */
-/* Other constants. */
-#define LDM_DB_SIZE             2048            /* Size in sectors (= 1MiB). */
-#define OFF_PRIV1               6               /* Offset of the first privhead
-                                                   relative to the start of the
-                                                   device in sectors */
-/* Offsets to structures within the LDM Database in sectors. */
-#define OFF_PRIV2               1856            /* Backup private headers. */
-#define OFF_PRIV3               2047
-#define OFF_TOCB1               1               /* Tables of contents. */
-#define OFF_TOCB2               2
-#define OFF_TOCB3               2045
-#define OFF_TOCB4               2046
-#define OFF_VMDB                17              /* List of partitions. */
-#define LDM_PARTITION           0x42            /* Formerly SFS (Landis). */
-#define TOC_BITMAP1             "config"        /* Names of the two defined */
-#define TOC_BITMAP2             "log"           /* bitmaps in the TOCBLOCK. */
-/* Borrowed from msdos.c */
-#define SYS_IND(p)              (get_unaligned(&(p)->sys_ind))
-struct frag {                           /* VBLK Fragment handling */
-        struct list_head list;
-        u32             group;
-        u8              num;            /* Total number of records */
-        u8              rec;            /* This is record number n */
-        u8              map;            /* Which portions are in use */
-        u8              data[0];
-};
-/* In memory LDM database structures. */
-#define GUID_SIZE               16
-struct privhead {                       /* Offsets and sizes are in sectors. */
-        u16     ver_major;
-        u16     ver_minor;
-        u64     logical_disk_start;
-        u64     logical_disk_size;
-        u64     config_start;
-        u64     config_size;
-        u8      disk_id[GUID_SIZE];
-};
-struct tocblock {                       /* We have exactly two bitmaps. */
-        u8      bitmap1_name[16];
-        u64     bitmap1_start;
-        u64     bitmap1_size;
-        u8      bitmap2_name[16];
-        u64     bitmap2_start;
-        u64     bitmap2_size;
-};
-struct vmdb {                           /* VMDB: The database header */
-        u16     ver_major;
-        u16     ver_minor;
-        u32     vblk_size;
-        u32     vblk_offset;
-        u32     last_vblk_seq;
-};
-struct vblk_comp {                      /* VBLK Component */
-        u8      state[16];
-        u64     parent_id;
-        u8      type;
-        u8      children;
-        u16     chunksize;
-};
-struct vblk_dgrp {                      /* VBLK Disk Group */
-        u8      disk_id[64];
-};
-struct vblk_disk {                      /* VBLK Disk */
-        u8      disk_id[GUID_SIZE];
-        u8      alt_name[128];
-};
-struct vblk_part {                      /* VBLK Partition */
-        u64     start;
-        u64     size;                   /* start, size and vol_off in sectors */
-        u64     volume_offset;
-        u64     parent_id;
-        u64     disk_id;
-        u8      partnum;
-};
-struct vblk_volu {                      /* VBLK Volume */
-        u8      volume_type[16];
-        u8      volume_state[16];
-        u8      guid[16];
-        u8      drive_hint[4];
-        u64     size;
-        u8      partition_type;
-};
-struct vblk_head {                      /* VBLK standard header */
-        u32 group;
-        u16 rec;
-        u16 nrec;
-};
-struct vblk {                           /* Generalised VBLK */
-        u8      name[64];
-        u64     obj_id;
-        u32     sequence;
-        u8      flags;
-        u8      type;
-        union {
-                struct vblk_comp comp;
-                struct vblk_dgrp dgrp;
-                struct vblk_disk disk;
-                struct vblk_part part;
-                struct vblk_volu volu;
-        } vblk;
-        struct list_head list;
-};
-struct ldmdb {                          /* Cache of the database */
-        struct privhead ph;
-        struct tocblock toc;
-        struct vmdb     vm;
-        struct list_head v_dgrp;
-        struct list_head v_disk;
-        struct list_head v_volu;
-        struct list_head v_comp;
-        struct list_head v_part;
-};
-int ldm_partition(struct parsed_partitions *state);
-#endif /* _FS_PT_LDM_H_ */
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
deleted file mode 100644
index 11f688bd76c5..000000000000
--- a/fs/partitions/mac.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- *  fs/partitions/mac.c
- *
- *  Code extracted from drivers/block/genhd.c
- *  Copyright (C) 1991-1998  Linus Torvalds
- *  Re-organised Feb 1998 Russell King
- */
-#include <linux/ctype.h>
-#include "check.h"
-#include "mac.h"
-#ifdef CONFIG_PPC_PMAC
-#include <asm/machdep.h>
-extern void note_bootable_part(dev_t dev, int part, int goodness);
-#endif
-/*
- * Code to understand MacOS partition tables.
- */
-static inline void mac_fix_string(char *stg, int len)
-{
-        int i;
-        for (i = len - 1; i >= 0 && stg[i] == ' '; i--)
-                stg[i] = 0;
-}
-int mac_partition(struct parsed_partitions *state)
-{
-        Sector sect;
-        unsigned char *data;
-        int slot, blocks_in_map;
-        unsigned secsize;
-#ifdef CONFIG_PPC_PMAC
-        int found_root = 0;
-        int found_root_goodness = 0;
-#endif
-        struct mac_partition *part;
-        struct mac_driver_desc *md;
-        /* Get 0th block and look at the first partition map entry. */
-        md = read_part_sector(state, 0, &sect);
-        if (!md)
-                return -1;
-        if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        secsize = be16_to_cpu(md->block_size);
-        put_dev_sector(sect);
-        data = read_part_sector(state, secsize/512, &sect);
-        if (!data)
-                return -1;
-        part = (struct mac_partition *) (data + secsize%512);
-        if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
-                put_dev_sector(sect);
-                return 0;               /* not a MacOS disk */
-        }
-        blocks_in_map = be32_to_cpu(part->map_count);
-        if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
-        for (slot = 1; slot <= blocks_in_map; ++slot) {
-                int pos = slot * secsize;
-                put_dev_sector(sect);
-                data = read_part_sector(state, pos/512, &sect);
-                if (!data)
-                        return -1;
-                part = (struct mac_partition *) (data + pos%512);
-                if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC)
-                        break;
-                put_partition(state, slot,
-                        be32_to_cpu(part->start_block) * (secsize/512),
-                        be32_to_cpu(part->block_count) * (secsize/512));
-                if (!strnicmp(part->type, "Linux_RAID", 10))
-                        state->parts[slot].flags = ADDPART_FLAG_RAID;
-#ifdef CONFIG_PPC_PMAC
-                /*
-                 * If this is the first bootable partition, tell the
-                 * setup code, in case it wants to make this the root.
-                 */
-                if (machine_is(powermac)) {
-                        int goodness = 0;
-                        mac_fix_string(part->processor, 16);
-                        mac_fix_string(part->name, 32);
-                        mac_fix_string(part->type, 32);                                 
-                    
-                        if ((be32_to_cpu(part->status) & MAC_STATUS_BOOTABLE)
-                            && strcasecmp(part->processor, "powerpc") == 0)
-                                goodness++;
-                        if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0
-                            || (strnicmp(part->type, "Linux", 5) == 0
-                                && strcasecmp(part->type, "Linux_swap") != 0)) {
-                                int i, l;
-                                goodness++;
-                                l = strlen(part->name);
-                                if (strcmp(part->name, "/") == 0)
-                                        goodness++;
-                                for (i = 0; i <= l - 4; ++i) {
-                                        if (strnicmp(part->name + i, "root",
-                                                     4) == 0) {
-                                                goodness += 2;
-                                                break;
-                                        }
-                                }
-                                if (strnicmp(part->name, "swap", 4) == 0)
-                                        goodness--;
-                        }
-                        if (goodness > found_root_goodness) {
-                                found_root = slot;
-                                found_root_goodness = goodness;
-                        }
-                }
-#endif /* CONFIG_PPC_PMAC */
-        }
-#ifdef CONFIG_PPC_PMAC
-        if (found_root_goodness)
-                note_bootable_part(state->bdev->bd_dev, found_root,
-                                   found_root_goodness);
-#endif
-        put_dev_sector(sect);
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        return 1;
-}
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h
deleted file mode 100644
index 3c7d98436380..000000000000
--- a/fs/partitions/mac.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- *  fs/partitions/mac.h
- */
-#define MAC_PARTITION_MAGIC     0x504d
-/* type field value for A/UX or other Unix partitions */
-#define APPLE_AUX_TYPE  "Apple_UNIX_SVR2"
-struct mac_partition {
-        __be16  signature;      /* expected to be MAC_PARTITION_MAGIC */
-        __be16  res1;
-        __be32  map_count;      /* # blocks in partition map */
-        __be32  start_block;    /* absolute starting block # of partition */
-        __be32  block_count;    /* number of blocks in partition */
-        char    name[32];       /* partition name */
-        char    type[32];       /* string type description */
-        __be32  data_start;     /* rel block # of first data block */
-        __be32  data_count;     /* number of data blocks */
-        __be32  status;         /* partition status bits */
-        __be32  boot_start;
-        __be32  boot_size;
-        __be32  boot_load;
-        __be32  boot_load2;
-        __be32  boot_entry;
-        __be32  boot_entry2;
-        __be32  boot_cksum;
-        char    processor[16];  /* identifies ISA of boot */
-        /* there is more stuff after this that we don't need */
-};
-#define MAC_STATUS_BOOTABLE     8       /* partition is bootable */
-#define MAC_DRIVER_MAGIC        0x4552
-/* Driver descriptor structure, in block 0 */
-struct mac_driver_desc {
-        __be16  signature;      /* expected to be MAC_DRIVER_MAGIC */
-        __be16  block_size;
-        __be32  block_count;
-    /* ... more stuff */
-};
-int mac_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
deleted file mode 100644
index 5f79a6677c69..000000000000
--- a/fs/partitions/msdos.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- *  fs/partitions/msdos.c
- *
- *  Code extracted from drivers/block/genhd.c
- *  Copyright (C) 1991-1998  Linus Torvalds
- *
- *  Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
- *  in the early extended-partition checks and added DM partitions
- *
- *  Support for DiskManager v6.0x added by Mark Lord,
- *  with information provided by OnTrack.  This now works for linux fdisk
- *  and LILO, as well as loadlin and bootln.  Note that disks other than
- *  /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1).
- *
- *  More flexible handling of extended partitions - aeb, 950831
- *
- *  Check partition table on IDE disks for common CHS translations
- *
- *  Re-organised Feb 1998 Russell King
- */
-#include <linux/msdos_fs.h>
-#include "check.h"
-#include "msdos.h"
-#include "efi.h"
-/*
- * Many architectures don't like unaligned accesses, while
- * the nr_sects and start_sect partition table entries are
- * at a 2 (mod 4) address.
- */
-#include <asm/unaligned.h>
-#define SYS_IND(p)      get_unaligned(&p->sys_ind)
-static inline sector_t nr_sects(struct partition *p)
-{
-        return (sector_t)get_unaligned_le32(&p->nr_sects);
-}
-static inline sector_t start_sect(struct partition *p)
-{
-        return (sector_t)get_unaligned_le32(&p->start_sect);
-}
-static inline int is_extended_partition(struct partition *p)
-{
-        return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
-                SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
-                SYS_IND(p) == LINUX_EXTENDED_PARTITION);
-}
-#define MSDOS_LABEL_MAGIC1      0x55
-#define MSDOS_LABEL_MAGIC2      0xAA
-static inline int
-msdos_magic_present(unsigned char *p)
-{
-        return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2);
-}
-/* Value is EBCDIC 'IBMA' */
-#define AIX_LABEL_MAGIC1        0xC9
-#define AIX_LABEL_MAGIC2        0xC2
-#define AIX_LABEL_MAGIC3        0xD4
-#define AIX_LABEL_MAGIC4        0xC1
-static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
-{
-        struct partition *pt = (struct partition *) (p + 0x1be);
-        Sector sect;
-        unsigned char *d;
-        int slot, ret = 0;
-        if (!(p[0] == AIX_LABEL_MAGIC1 &&
-                p[1] == AIX_LABEL_MAGIC2 &&
-                p[2] == AIX_LABEL_MAGIC3 &&
-                p[3] == AIX_LABEL_MAGIC4))
-                return 0;
-        /* Assume the partition table is valid if Linux partitions exists */
-        for (slot = 1; slot <= 4; slot++, pt++) {
-                if (pt->sys_ind == LINUX_SWAP_PARTITION ||
-                        pt->sys_ind == LINUX_RAID_PARTITION ||
-                        pt->sys_ind == LINUX_DATA_PARTITION ||
-                        pt->sys_ind == LINUX_LVM_PARTITION ||
-                        is_extended_partition(pt))
-                        return 0;
-        }
-        d = read_part_sector(state, 7, &sect);
-        if (d) {
-                if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
-                        ret = 1;
-                put_dev_sector(sect);
-        };
-        return ret;
-}
-/*
- * Create devices for each logical partition in an extended partition.
- * The logical partitions form a linked list, with each entry being
- * a partition table with two entries.  The first entry
- * is the real data partition (with a start relative to the partition
- * table start).  The second is a pointer to the next logical partition
- * (with a start relative to the entire extended partition).
- * We do not create a Linux partition for the partition tables, but
- * only for the actual data partitions.
- */
-static void parse_extended(struct parsed_partitions *state,
-                           sector_t first_sector, sector_t first_size)
-{
-        struct partition *p;
-        Sector sect;
-        unsigned char *data;
-        sector_t this_sector, this_size;
-        sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
-        int loopct = 0;         /* number of links followed
-                                   without finding a data partition */
-        int i;
-        this_sector = first_sector;
-        this_size = first_size;
-        while (1) {
-                if (++loopct > 100)
-                        return;
-                if (state->next == state->limit)
-                        return;
-                data = read_part_sector(state, this_sector, &sect);
-                if (!data)
-                        return;
-                if (!msdos_magic_present(data + 510))
-                        goto done; 
-                p = (struct partition *) (data + 0x1be);
-                /*
-                 * Usually, the first entry is the real data partition,
-                 * the 2nd entry is the next extended partition, or empty,
-                 * and the 3rd and 4th entries are unused.
-                 * However, DRDOS sometimes has the extended partition as
-                 * the first entry (when the data partition is empty),
-                 * and OS/2 seems to use all four entries.
-                 */
-                /* 
-                 * First process the data partition(s)
-                 */
-                for (i=0; i<4; i++, p++) {
-                        sector_t offs, size, next;
-                        if (!nr_sects(p) || is_extended_partition(p))
-                                continue;
-                        /* Check the 3rd and 4th entries -
-                           these sometimes contain random garbage */
-                        offs = start_sect(p)*sector_size;
-                        size = nr_sects(p)*sector_size;
-                        next = this_sector + offs;
-                        if (i >= 2) {
-                                if (offs + size > this_size)
-                                        continue;
-                                if (next < first_sector)
-                                        continue;
-                                if (next + size > first_sector + first_size)
-                                        continue;
-                        }
-                        put_partition(state, state->next, next, size);
-                        if (SYS_IND(p) == LINUX_RAID_PARTITION)
-                                state->parts[state->next].flags = ADDPART_FLAG_RAID;
-                        loopct = 0;
-                        if (++state->next == state->limit)
-                                goto done;
-                }
-                /*
-                 * Next, process the (first) extended partition, if present.
-                 * (So far, there seems to be no reason to make
-                 *  parse_extended()  recursive and allow a tree
-                 *  of extended partitions.)
-                 * It should be a link to the next logical partition.
-                 */
-                p -= 4;
-                for (i=0; i<4; i++, p++)
-                        if (nr_sects(p) && is_extended_partition(p))
-                                break;
-                if (i == 4)
-                        goto done;       /* nothing left to do */
-                this_sector = first_sector + start_sect(p) * sector_size;
-                this_size = nr_sects(p) * sector_size;
-                put_dev_sector(sect);
-        }
-done:
-        put_dev_sector(sect);
-}
-/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
-   indicates linux swap.  Be careful before believing this is Solaris. */
-static void parse_solaris_x86(struct parsed_partitions *state,
-                              sector_t offset, sector_t size, int origin)
-{
-#ifdef CONFIG_SOLARIS_X86_PARTITION
-        Sector sect;
-        struct solaris_x86_vtoc *v;
-        int i;
-        short max_nparts;
-        v = read_part_sector(state, offset + 1, &sect);
-        if (!v)
-                return;
-        if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) {
-                put_dev_sector(sect);
-                return;
-        }
-        {
-                char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1];
-                snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin);
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-        }
-        if (le32_to_cpu(v->v_version) != 1) {
-                char tmp[64];
-                snprintf(tmp, sizeof(tmp), "  cannot handle version %d vtoc>\n",
-                         le32_to_cpu(v->v_version));
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                put_dev_sector(sect);
-                return;
-        }
-        /* Ensure we can handle previous case of VTOC with 8 entries gracefully */
-        max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
-        for (i=0; i<max_nparts && state->next<state->limit; i++) {
-                struct solaris_x86_slice *s = &v->v_slice[i];
-                char tmp[3 + 10 + 1 + 1];
-                if (s->s_size == 0)
-                        continue;
-                snprintf(tmp, sizeof(tmp), " [s%d]", i);
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                /* solaris partitions are relative to current MS-DOS
-                 * one; must add the offset of the current partition */
-                put_partition(state, state->next++,
-                                 le32_to_cpu(s->s_start)+offset,
-                                 le32_to_cpu(s->s_size));
-        }
-        put_dev_sector(sect);
-        strlcat(state->pp_buf, " >\n", PAGE_SIZE);
-#endif
-}
-#if defined(CONFIG_BSD_DISKLABEL)
-/* 
- * Create devices for BSD partitions listed in a disklabel, under a
- * dos-like partition. See parse_extended() for more information.
- */
-static void parse_bsd(struct parsed_partitions *state,
-                      sector_t offset, sector_t size, int origin, char *flavour,
-                      int max_partitions)
-{
-        Sector sect;
-        struct bsd_disklabel *l;
-        struct bsd_partition *p;
-        char tmp[64];
-        l = read_part_sector(state, offset + 1, &sect);
-        if (!l)
-                return;
-        if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) {
-                put_dev_sector(sect);
-                return;
-        }
-        snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour);
-        strlcat(state->pp_buf, tmp, PAGE_SIZE);
-        if (le16_to_cpu(l->d_npartitions) < max_partitions)
-                max_partitions = le16_to_cpu(l->d_npartitions);
-        for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) {
-                sector_t bsd_start, bsd_size;
-                if (state->next == state->limit)
-                        break;
-                if (p->p_fstype == BSD_FS_UNUSED) 
-                        continue;
-                bsd_start = le32_to_cpu(p->p_offset);
-                bsd_size = le32_to_cpu(p->p_size);
-                if (offset == bsd_start && size == bsd_size)
-                        /* full parent partition, we have it already */
-                        continue;
-                if (offset > bsd_start || offset+size < bsd_start+bsd_size) {
-                        strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE);
-                        continue;
-                }
-                put_partition(state, state->next++, bsd_start, bsd_size);
-        }
-        put_dev_sector(sect);
-        if (le16_to_cpu(l->d_npartitions) > max_partitions) {
-                snprintf(tmp, sizeof(tmp), " (ignored %d more)",
-                         le16_to_cpu(l->d_npartitions) - max_partitions);
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-        }
-        strlcat(state->pp_buf, " >\n", PAGE_SIZE);
-}
-#endif
-static void parse_freebsd(struct parsed_partitions *state,
-                          sector_t offset, sector_t size, int origin)
-{
-#ifdef CONFIG_BSD_DISKLABEL
-        parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS);
-#endif
-}
-static void parse_netbsd(struct parsed_partitions *state,
-                         sector_t offset, sector_t size, int origin)
-{
-#ifdef CONFIG_BSD_DISKLABEL
-        parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS);
-#endif
-}
-static void parse_openbsd(struct parsed_partitions *state,
-                          sector_t offset, sector_t size, int origin)
-{
-#ifdef CONFIG_BSD_DISKLABEL
-        parse_bsd(state, offset, size, origin, "openbsd",
-                  OPENBSD_MAXPARTITIONS);
-#endif
-}
-/*
- * Create devices for Unixware partitions listed in a disklabel, under a
- * dos-like partition. See parse_extended() for more information.
- */
-static void parse_unixware(struct parsed_partitions *state,
-                           sector_t offset, sector_t size, int origin)
-{
-#ifdef CONFIG_UNIXWARE_DISKLABEL
-        Sector sect;
-        struct unixware_disklabel *l;
-        struct unixware_slice *p;
-        l = read_part_sector(state, offset + 29, &sect);
-        if (!l)
-                return;
-        if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC ||
-            le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) {
-                put_dev_sector(sect);
-                return;
-        }
-        {
-                char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1];
-                snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin);
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-        }
-        p = &l->vtoc.v_slice[1];
-        /* I omit the 0th slice as it is the same as whole disk. */
-        while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
-                if (state->next == state->limit)
-                        break;
-                if (p->s_label != UNIXWARE_FS_UNUSED)
-                        put_partition(state, state->next++,
-                                      le32_to_cpu(p->start_sect),
-                                      le32_to_cpu(p->nr_sects));
-                p++;
-        }
-        put_dev_sector(sect);
-        strlcat(state->pp_buf, " >\n", PAGE_SIZE);
-#endif
-}
-/*
- * Minix 2.0.0/2.0.2 subpartition support.
- * Anand Krishnamurthy <anandk@wiproge.med.ge.com>
- * Rajeev V. Pillai    <rajeevvp@yahoo.com>
- */
-static void parse_minix(struct parsed_partitions *state,
-                        sector_t offset, sector_t size, int origin)
-{
-#ifdef CONFIG_MINIX_SUBPARTITION
-        Sector sect;
-        unsigned char *data;
-        struct partition *p;
-        int i;
-        data = read_part_sector(state, offset, &sect);
-        if (!data)
-                return;
-        p = (struct partition *)(data + 0x1be);
-        /* The first sector of a Minix partition can have either
-         * a secondary MBR describing its subpartitions, or
-         * the normal boot sector. */
-        if (msdos_magic_present (data + 510) &&
-            SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */
-                char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1];
-                snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin);
-                strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) {
-                        if (state->next == state->limit)
-                                break;
-                        /* add each partition in use */
-                        if (SYS_IND(p) == MINIX_PARTITION)
-                                put_partition(state, state->next++,
-                                              start_sect(p), nr_sects(p));
-                }
-                strlcat(state->pp_buf, " >\n", PAGE_SIZE);
-        }
-        put_dev_sector(sect);
-#endif /* CONFIG_MINIX_SUBPARTITION */
-}
-static struct {
-        unsigned char id;
-        void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
-} subtypes[] = {
-        {FREEBSD_PARTITION, parse_freebsd},
-        {NETBSD_PARTITION, parse_netbsd},
-        {OPENBSD_PARTITION, parse_openbsd},
-        {MINIX_PARTITION, parse_minix},
-        {UNIXWARE_PARTITION, parse_unixware},
-        {SOLARIS_X86_PARTITION, parse_solaris_x86},
-        {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86},
-        {0, NULL},
-};
- 
-int msdos_partition(struct parsed_partitions *state)
-{
-        sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
-        Sector sect;
-        unsigned char *data;
-        struct partition *p;
-        struct fat_boot_sector *fb;
-        int slot;
-        data = read_part_sector(state, 0, &sect);
-        if (!data)
-                return -1;
-        if (!msdos_magic_present(data + 510)) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        if (aix_magic_present(state, data)) {
-                put_dev_sector(sect);
-                strlcat(state->pp_buf, " [AIX]", PAGE_SIZE);
-                return 0;
-        }
-        /*
-         * Now that the 55aa signature is present, this is probably
-         * either the boot sector of a FAT filesystem or a DOS-type
-         * partition table. Reject this in case the boot indicator
-         * is not 0 or 0x80.
-         */
-        p = (struct partition *) (data + 0x1be);
-        for (slot = 1; slot <= 4; slot++, p++) {
-                if (p->boot_ind != 0 && p->boot_ind != 0x80) {
-                        /*
-                         * Even without a valid boot inidicator value
-                         * its still possible this is valid FAT filesystem
-                         * without a partition table.
-                         */
-                        fb = (struct fat_boot_sector *) data;
-                        if (slot == 1 && fb->reserved && fb->fats
-                                && fat_valid_media(fb->media)) {
-                                strlcat(state->pp_buf, "\n", PAGE_SIZE);
-                                put_dev_sector(sect);
-                                return 1;
-                        } else {
-                                put_dev_sector(sect);
-                                return 0;
-                        }
-                }
-        }
-#ifdef CONFIG_EFI_PARTITION
-        p = (struct partition *) (data + 0x1be);
-        for (slot = 1 ; slot <= 4 ; slot++, p++) {
-                /* If this is an EFI GPT disk, msdos should ignore it. */
-                if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) {
-                        put_dev_sector(sect);
-                        return 0;
-                }
-        }
-#endif
-        p = (struct partition *) (data + 0x1be);
-        /*
-         * Look for partitions in two passes:
-         * First find the primary and DOS-type extended partitions.
-         * On the second pass look inside *BSD, Unixware and Solaris partitions.
-         */
-        state->next = 5;
-        for (slot = 1 ; slot <= 4 ; slot++, p++) {
-                sector_t start = start_sect(p)*sector_size;
-                sector_t size = nr_sects(p)*sector_size;
-                if (!size)
-                        continue;
-                if (is_extended_partition(p)) {
-                        /*
-                         * prevent someone doing mkfs or mkswap on an
-                         * extended partition, but leave room for LILO
-                         * FIXME: this uses one logical sector for > 512b
-                         * sector, although it may not be enough/proper.
-                         */
-                        sector_t n = 2;
-                        n = min(size, max(sector_size, n));
-                        put_partition(state, slot, start, n);
-                        strlcat(state->pp_buf, " <", PAGE_SIZE);
-                        parse_extended(state, start, size);
-                        strlcat(state->pp_buf, " >", PAGE_SIZE);
-                        continue;
-                }
-                put_partition(state, slot, start, size);
-                if (SYS_IND(p) == LINUX_RAID_PARTITION)
-                        state->parts[slot].flags = ADDPART_FLAG_RAID;
-                if (SYS_IND(p) == DM6_PARTITION)
-                        strlcat(state->pp_buf, "[DM]", PAGE_SIZE);
-                if (SYS_IND(p) == EZD_PARTITION)
-                        strlcat(state->pp_buf, "[EZD]", PAGE_SIZE);
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        /* second pass - output for each on a separate line */
-        p = (struct partition *) (0x1be + data);
-        for (slot = 1 ; slot <= 4 ; slot++, p++) {
-                unsigned char id = SYS_IND(p);
-                int n;
-                if (!nr_sects(p))
-                        continue;
-                for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++)
-                        ;
-                if (!subtypes[n].parse)
-                        continue;
-                subtypes[n].parse(state, start_sect(p) * sector_size,
-                                  nr_sects(p) * sector_size, slot);
-        }
-        put_dev_sector(sect);
-        return 1;
-}
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h
deleted file mode 100644
index 38c781c490b3..000000000000
--- a/fs/partitions/msdos.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- *  fs/partitions/msdos.h
- */
-#define MSDOS_LABEL_MAGIC               0xAA55
-int msdos_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
deleted file mode 100644
index 764b86a01965..000000000000
--- a/fs/partitions/osf.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- *  fs/partitions/osf.c
- *
- *  Code extracted from drivers/block/genhd.c
- *
- *  Copyright (C) 1991-1998  Linus Torvalds
- *  Re-organised Feb 1998 Russell King
- */
-#include "check.h"
-#include "osf.h"
-#define MAX_OSF_PARTITIONS 18
-int osf_partition(struct parsed_partitions *state)
-{
-        int i;
-        int slot = 1;
-        unsigned int npartitions;
-        Sector sect;
-        unsigned char *data;
-        struct disklabel {
-                __le32 d_magic;
-                __le16 d_type,d_subtype;
-                u8 d_typename[16];
-                u8 d_packname[16];
-                __le32 d_secsize;
-                __le32 d_nsectors;
-                __le32 d_ntracks;
-                __le32 d_ncylinders;
-                __le32 d_secpercyl;
-                __le32 d_secprtunit;
-                __le16 d_sparespertrack;
-                __le16 d_sparespercyl;
-                __le32 d_acylinders;
-                __le16 d_rpm, d_interleave, d_trackskew, d_cylskew;
-                __le32 d_headswitch, d_trkseek, d_flags;
-                __le32 d_drivedata[5];
-                __le32 d_spare[5];
-                __le32 d_magic2;
-                __le16 d_checksum;
-                __le16 d_npartitions;
-                __le32 d_bbsize, d_sbsize;
-                struct d_partition {
-                        __le32 p_size;
-                        __le32 p_offset;
-                        __le32 p_fsize;
-                        u8  p_fstype;
-                        u8  p_frag;
-                        __le16 p_cpg;
-                } d_partitions[MAX_OSF_PARTITIONS];
-        } * label;
-        struct d_partition * partition;
-        data = read_part_sector(state, 0, &sect);
-        if (!data)
-                return -1;
-        label = (struct disklabel *) (data+64);
-        partition = label->d_partitions;
-        if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        npartitions = le16_to_cpu(label->d_npartitions);
-        if (npartitions > MAX_OSF_PARTITIONS) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        for (i = 0 ; i < npartitions; i++, partition++) {
-                if (slot == state->limit)
-                        break;
-                if (le32_to_cpu(partition->p_size))
-                        put_partition(state, slot,
-                                le32_to_cpu(partition->p_offset),
-                                le32_to_cpu(partition->p_size));
-                slot++;
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        put_dev_sector(sect);
-        return 1;
-}
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h
deleted file mode 100644
index 20ed2315ec16..000000000000
--- a/fs/partitions/osf.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- *  fs/partitions/osf.h
- */
-#define DISKLABELMAGIC (0x82564557UL)
-int osf_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
deleted file mode 100644
index ea8a86dceaf4..000000000000
--- a/fs/partitions/sgi.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- *  fs/partitions/sgi.c
- *
- *  Code extracted from drivers/block/genhd.c
- */
-#include "check.h"
-#include "sgi.h"
-struct sgi_disklabel {
-        __be32 magic_mushroom;          /* Big fat spliff... */
-        __be16 root_part_num;           /* Root partition number */
-        __be16 swap_part_num;           /* Swap partition number */
-        s8 boot_file[16];               /* Name of boot file for ARCS */
-        u8 _unused0[48];                /* Device parameter useless crapola.. */
-        struct sgi_volume {
-                s8 name[8];             /* Name of volume */
-                __be32 block_num;               /* Logical block number */
-                __be32 num_bytes;               /* How big, in bytes */
-        } volume[15];
-        struct sgi_partition {
-                __be32 num_blocks;              /* Size in logical blocks */
-                __be32 first_block;     /* First logical block */
-                __be32 type;            /* Type of this partition */
-        } partitions[16];
-        __be32 csum;                    /* Disk label checksum */
-        __be32 _unused1;                        /* Padding */
-};
-int sgi_partition(struct parsed_partitions *state)
-{
-        int i, csum;
-        __be32 magic;
-        int slot = 1;
-        unsigned int start, blocks;
-        __be32 *ui, cs;
-        Sector sect;
-        struct sgi_disklabel *label;
-        struct sgi_partition *p;
-        char b[BDEVNAME_SIZE];
-        label = read_part_sector(state, 0, &sect);
-        if (!label)
-                return -1;
-        p = &label->partitions[0];
-        magic = label->magic_mushroom;
-        if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
-                /*printk("Dev %s SGI disklabel: bad magic %08x\n",
-                       bdevname(bdev, b), be32_to_cpu(magic));*/
-                put_dev_sector(sect);
-                return 0;
-        }
-        ui = ((__be32 *) (label + 1)) - 1;
-        for(csum = 0; ui >= ((__be32 *) label);) {
-                cs = *ui--;
-                csum += be32_to_cpu(cs);
-        }
-        if(csum) {
-                printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
-                       bdevname(state->bdev, b));
-                put_dev_sector(sect);
-                return 0;
-        }
-        /* All SGI disk labels have 16 partitions, disks under Linux only
-         * have 15 minor's.  Luckily there are always a few zero length
-         * partitions which we don't care about so we never overflow the
-         * current_minor.
-         */
-        for(i = 0; i < 16; i++, p++) {
-                blocks = be32_to_cpu(p->num_blocks);
-                start  = be32_to_cpu(p->first_block);
-                if (blocks) {
-                        put_partition(state, slot, start, blocks);
-                        if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION)
-                                state->parts[slot].flags = ADDPART_FLAG_RAID;
-                }
-                slot++;
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        put_dev_sector(sect);
-        return 1;
-}
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h
deleted file mode 100644
index b9553ebdd5a9..000000000000
--- a/fs/partitions/sgi.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- *  fs/partitions/sgi.h
- */
-extern int sgi_partition(struct parsed_partitions *state);
-#define SGI_LABEL_MAGIC 0x0be5a941
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
deleted file mode 100644
index b5b6fcfb3d36..000000000000
--- a/fs/partitions/sun.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- *  fs/partitions/sun.c
- *
- *  Code extracted from drivers/block/genhd.c
- *
- *  Copyright (C) 1991-1998  Linus Torvalds
- *  Re-organised Feb 1998 Russell King
- */
-#include "check.h"
-#include "sun.h"
-int sun_partition(struct parsed_partitions *state)
-{
-        int i;
-        __be16 csum;
-        int slot = 1;
-        __be16 *ush;
-        Sector sect;
-        struct sun_disklabel {
-                unsigned char info[128];   /* Informative text string */
-                struct sun_vtoc {
-                    __be32 version;     /* Layout version */
-                    char   volume[8];   /* Volume name */
-                    __be16 nparts;      /* Number of partitions */
-                    struct sun_info {           /* Partition hdrs, sec 2 */
-                        __be16 id;
-                        __be16 flags;
-                    } infos[8];
-                    __be16 padding;     /* Alignment padding */
-                    __be32 bootinfo[3];  /* Info needed by mboot */
-                    __be32 sanity;       /* To verify vtoc sanity */
-                    __be32 reserved[10]; /* Free space */
-                    __be32 timestamp[8]; /* Partition timestamp */
-                } vtoc;
-                __be32 write_reinstruct; /* sectors to skip, writes */
-                __be32 read_reinstruct;  /* sectors to skip, reads */
-                unsigned char spare[148]; /* Padding */
-                __be16 rspeed;     /* Disk rotational speed */
-                __be16 pcylcount;  /* Physical cylinder count */
-                __be16 sparecyl;   /* extra sects per cylinder */
-                __be16 obs1;       /* gap1 */
-                __be16 obs2;       /* gap2 */
-                __be16 ilfact;     /* Interleave factor */
-                __be16 ncyl;       /* Data cylinder count */
-                __be16 nacyl;      /* Alt. cylinder count */
-                __be16 ntrks;      /* Tracks per cylinder */
-                __be16 nsect;      /* Sectors per track */
-                __be16 obs3;       /* bhead - Label head offset */
-                __be16 obs4;       /* ppart - Physical Partition */
-                struct sun_partition {
-                        __be32 start_cylinder;
-                        __be32 num_sectors;
-                } partitions[8];
-                __be16 magic;      /* Magic number */
-                __be16 csum;       /* Label xor'd checksum */
-        } * label;
-        struct sun_partition *p;
-        unsigned long spc;
-        char b[BDEVNAME_SIZE];
-        int use_vtoc;
-        int nparts;
-        label = read_part_sector(state, 0, &sect);
-        if (!label)
-                return -1;
-        p = label->partitions;
-        if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
-/*              printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
-                       bdevname(bdev, b), be16_to_cpu(label->magic)); */
-                put_dev_sector(sect);
-                return 0;
-        }
-        /* Look at the checksum */
-        ush = ((__be16 *) (label+1)) - 1;
-        for (csum = 0; ush >= ((__be16 *) label);)
-                csum ^= *ush--;
-        if (csum) {
-                printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
-                       bdevname(state->bdev, b));
-                put_dev_sector(sect);
-                return 0;
-        }
-        /* Check to see if we can use the VTOC table */
-        use_vtoc = ((be32_to_cpu(label->vtoc.sanity) == SUN_VTOC_SANITY) &&
-                    (be32_to_cpu(label->vtoc.version) == 1) &&
-                    (be16_to_cpu(label->vtoc.nparts) <= 8));
-        /* Use 8 partition entries if not specified in validated VTOC */
-        nparts = (use_vtoc) ? be16_to_cpu(label->vtoc.nparts) : 8;
-        /*
-         * So that old Linux-Sun partitions continue to work,
-         * alow the VTOC to be used under the additional condition ...
-         */
-        use_vtoc = use_vtoc || !(label->vtoc.sanity ||
-                                 label->vtoc.version || label->vtoc.nparts);
-        spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
-        for (i = 0; i < nparts; i++, p++) {
-                unsigned long st_sector;
-                unsigned int num_sectors;
-                st_sector = be32_to_cpu(p->start_cylinder) * spc;
-                num_sectors = be32_to_cpu(p->num_sectors);
-                if (num_sectors) {
-                        put_partition(state, slot, st_sector, num_sectors);
-                        state->parts[slot].flags = 0;
-                        if (use_vtoc) {
-                                if (be16_to_cpu(label->vtoc.infos[i].id) == LINUX_RAID_PARTITION)
-                                        state->parts[slot].flags |= ADDPART_FLAG_RAID;
-                                else if (be16_to_cpu(label->vtoc.infos[i].id) == SUN_WHOLE_DISK)
-                                        state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK;
-                        }
-                }
-                slot++;
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        put_dev_sector(sect);
-        return 1;
-}
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h
deleted file mode 100644
index 2424baa8319f..000000000000
--- a/fs/partitions/sun.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/*
- *  fs/partitions/sun.h
- */
-#define SUN_LABEL_MAGIC          0xDABE
-#define SUN_VTOC_SANITY          0x600DDEEE
-int sun_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c
deleted file mode 100644
index 9627ccffc1c4..000000000000
--- a/fs/partitions/sysv68.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- *  fs/partitions/sysv68.c
- *
- *  Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be>
- */
-#include "check.h"
-#include "sysv68.h"
-/*
- *      Volume ID structure: on first 256-bytes sector of disk
- */
-struct volumeid {
-        u8      vid_unused[248];
-        u8      vid_mac[8];     /* ASCII string "MOTOROLA" */
-};
-/*
- *      config block: second 256-bytes sector on disk
- */
-struct dkconfig {
-        u8      ios_unused0[128];
-        __be32  ios_slcblk;     /* Slice table block number */
-        __be16  ios_slccnt;     /* Number of entries in slice table */
-        u8      ios_unused1[122];
-};
-/*
- *      combined volumeid and dkconfig block
- */
-struct dkblk0 {
-        struct volumeid dk_vid;
-        struct dkconfig dk_ios;
-};
-/*
- *      Slice Table Structure
- */
-struct slice {
-        __be32  nblocks;                /* slice size (in blocks) */
-        __be32  blkoff;                 /* block offset of slice */
-};
-int sysv68_partition(struct parsed_partitions *state)
-{
-        int i, slices;
-        int slot = 1;
-        Sector sect;
-        unsigned char *data;
-        struct dkblk0 *b;
-        struct slice *slice;
-        char tmp[64];
-        data = read_part_sector(state, 0, &sect);
-        if (!data)
-                return -1;
-        b = (struct dkblk0 *)data;
-        if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) {
-                put_dev_sector(sect);
-                return 0;
-        }
-        slices = be16_to_cpu(b->dk_ios.ios_slccnt);
-        i = be32_to_cpu(b->dk_ios.ios_slcblk);
-        put_dev_sector(sect);
-        data = read_part_sector(state, i, &sect);
-        if (!data)
-                return -1;
-        slices -= 1; /* last slice is the whole disk */
-        snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices);
-        strlcat(state->pp_buf, tmp, PAGE_SIZE);
-        slice = (struct slice *)data;
-        for (i = 0; i < slices; i++, slice++) {
-                if (slot == state->limit)
-                        break;
-                if (be32_to_cpu(slice->nblocks)) {
-                        put_partition(state, slot,
-                                be32_to_cpu(slice->blkoff),
-                                be32_to_cpu(slice->nblocks));
-                        snprintf(tmp, sizeof(tmp), "(s%u)", i);
-                        strlcat(state->pp_buf, tmp, PAGE_SIZE);
-                }
-                slot++;
-        }
-        strlcat(state->pp_buf, "\n", PAGE_SIZE);
-        put_dev_sector(sect);
-        return 1;
-}
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h
deleted file mode 100644
index bf2f5ffa97ac..000000000000
--- a/fs/partitions/sysv68.h
+++ /dev/null
@@ -1 +0,0 @@
-extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c
deleted file mode 100644
index 8dbaf9f77a99..000000000000
--- a/fs/partitions/ultrix.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  fs/partitions/ultrix.c
- *
- *  Code extracted from drivers/block/genhd.c
- *
- *  Re-organised Jul 1999 Russell King
- */
-#include "check.h"
-#include "ultrix.h"
-int ultrix_partition(struct parsed_partitions *state)
-{
-        int i;
-        Sector sect;
-        unsigned char *data;
-        struct ultrix_disklabel {
-                s32     pt_magic;       /* magic no. indicating part. info exits */
-                s32     pt_valid;       /* set by driver if pt is current */
-                struct  pt_info {
-                        s32             pi_nblocks; /* no. of sectors */
-                        u32             pi_blkoff;  /* block offset for start */
-                } pt_part[8];
-        } *label;
-#define PT_MAGIC        0x032957        /* Partition magic number */
-#define PT_VALID        1               /* Indicates if struct is valid */
-        data = read_part_sector(state, (16384 - sizeof(*label))/512, &sect);
-        if (!data)
-                return -1;
-        
-        label = (struct ultrix_disklabel *)(data + 512 - sizeof(*label));
-        if (label->pt_magic == PT_MAGIC && label->pt_valid == PT_VALID) {
-                for (i=0; i<8; i++)
-                        if (label->pt_part[i].pi_nblocks)
-                                put_partition(state, i+1, 
-                                              label->pt_part[i].pi_blkoff,
-                                              label->pt_part[i].pi_nblocks);
-                put_dev_sector(sect);
-                strlcat(state->pp_buf, "\n", PAGE_SIZE);
-                return 1;
-        } else {
-                put_dev_sector(sect);
-                return 0;
-        }
-}
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h
deleted file mode 100644
index a3cc00b2bded..000000000000
--- a/fs/partitions/ultrix.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/*
- *  fs/partitions/ultrix.h
- */
-int ultrix_partition(struct parsed_partitions *state);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4065f07366b3..f0e485d54e64 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1290,11 +1290,4 @@ static int __init init_pipe_fs(void)
        return err;
 }
-static void __exit exit_pipe_fs(void)
-{
-        kern_unmount(pipe_mnt);
-        unregister_filesystem(&pipe_fs_type);
-}
 fs_initcall(init_pipe_fs);
-module_exit(exit_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index d42514e32380..ab5fa9e1a79a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -13,45 +13,30 @@
 #include "pnode.h"
 /* return the next shared peer mount of @p */
-static inline struct vfsmount *next_peer(struct vfsmount *p)
+static inline struct mount *next_peer(struct mount *p)
 {
-        return list_entry(p->mnt_share.next, struct vfsmount, mnt_share);
+        return list_entry(p->mnt_share.next, struct mount, mnt_share);
 }
-static inline struct vfsmount *first_slave(struct vfsmount *p)
+static inline struct mount *first_slave(struct mount *p)
 {
-        return list_entry(p->mnt_slave_list.next, struct vfsmount, mnt_slave);
+        return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
 }
-static inline struct vfsmount *next_slave(struct vfsmount *p)
+static inline struct mount *next_slave(struct mount *p)
 {
-        return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
+        return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
 }
-/*
+static struct mount *get_peer_under_root(struct mount *mnt,
- * Return true if path is reachable from root
+                                         struct mnt_namespace *ns,
- *
+                                         const struct path *root)
- * namespace_sem is held, and mnt is attached
- */
-static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry,
-                         const struct path *root)
-{
-        while (mnt != root->mnt && mnt->mnt_parent != mnt) {
-                dentry = mnt->mnt_mountpoint;
-                mnt = mnt->mnt_parent;
-        }
-        return mnt == root->mnt && is_subdir(dentry, root->dentry);
-}
-static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
-                                            struct mnt_namespace *ns,
-                                            const struct path *root)
 {
-        struct vfsmount *m = mnt;
+        struct mount *m = mnt;
        do {
                /* Check the namespace first for optimization */
-                if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root))
+                if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
                        return m;
                m = next_peer(m);
@@ -66,12 +51,12 @@ static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
 *
 * Caller must hold namespace_sem
 */
-int get_dominating_id(struct vfsmount *mnt, const struct path *root)
+int get_dominating_id(struct mount *mnt, const struct path *root)
 {
-        struct vfsmount *m;
+        struct mount *m;
        for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
-                struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root);
+                struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
                if (d)
                        return d->mnt_group_id;
        }
@@ -79,10 +64,10 @@ int get_dominating_id(struct vfsmount *mnt, const struct path *root)
        return 0;
 }
-static int do_make_slave(struct vfsmount *mnt)
+static int do_make_slave(struct mount *mnt)
 {
-        struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master;
+        struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
-        struct vfsmount *slave_mnt;
+        struct mount *slave_mnt;
        /*
         * slave 'mnt' to a peer mount that has the
@@ -90,7 +75,7 @@ static int do_make_slave(struct vfsmount *mnt)
         * slave it to anything that is available.
         */
        while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
-               peer_mnt->mnt_root != mnt->mnt_root) ;
+               peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
        if (peer_mnt == mnt) {
                peer_mnt = next_peer(mnt);
@@ -116,7 +101,7 @@ static int do_make_slave(struct vfsmount *mnt)
                struct list_head *p = &mnt->mnt_slave_list;
                while (!list_empty(p)) {
                        slave_mnt = list_first_entry(p,
-                                        struct vfsmount, mnt_slave);
+                                        struct mount, mnt_slave);
                        list_del_init(&slave_mnt->mnt_slave);
                        slave_mnt->mnt_master = NULL;
                }
@@ -129,7 +114,7 @@ static int do_make_slave(struct vfsmount *mnt)
 /*
 * vfsmount lock must be held for write
 */
-void change_mnt_propagation(struct vfsmount *mnt, int type)
+void change_mnt_propagation(struct mount *mnt, int type)
 {
        if (type == MS_SHARED) {
                set_mnt_shared(mnt);
@@ -140,9 +125,9 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
                list_del_init(&mnt->mnt_slave);
                mnt->mnt_master = NULL;
                if (type == MS_UNBINDABLE)
-                        mnt->mnt_flags |= MNT_UNBINDABLE;
+                        mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
                else
-                        mnt->mnt_flags &= ~MNT_UNBINDABLE;
+                        mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
        }
 }
@@ -156,20 +141,19 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
 * vfsmount found while iterating with propagation_next() is
 * a peer of one we'd found earlier.
 */
-static struct vfsmount *propagation_next(struct vfsmount *m,
+static struct mount *propagation_next(struct mount *m,
-                                         struct vfsmount *origin)
+                                         struct mount *origin)
 {
        /* are there any slaves of this mount? */
        if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
                return first_slave(m);
        while (1) {
-                struct vfsmount *next;
+                struct mount *master = m->mnt_master;
-                struct vfsmount *master = m->mnt_master;
                if (master == origin->mnt_master) {
-                        next = next_peer(m);
+                        struct mount *next = next_peer(m);
-                        return ((next == origin) ? NULL : next);
+                        return (next == origin) ? NULL : next;
                } else if (m->mnt_slave.next != &master->mnt_slave_list)
                        return next_slave(m);
@@ -187,13 +171,13 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
 * @type        return CL_SLAVE if the new mount has to be
 *              cloned as a slave.
 */
-static struct vfsmount *get_source(struct vfsmount *dest,
+static struct mount *get_source(struct mount *dest,
-                                        struct vfsmount *last_dest,
+                                struct mount *last_dest,
-                                        struct vfsmount *last_src,
+                                struct mount *last_src,
-                                        int *type)
+                                int *type)
 {
-        struct vfsmount *p_last_src = NULL;
+        struct mount *p_last_src = NULL;
-        struct vfsmount *p_last_dest = NULL;
+        struct mount *p_last_dest = NULL;
        while (last_dest != dest->mnt_master) {
                p_last_dest = last_dest;
@@ -233,33 +217,33 @@ static struct vfsmount *get_source(struct vfsmount *dest,
 * @source_mnt: source mount.
 * @tree_list : list of heads of trees to be attached.
 */
-int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
+int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
-                    struct vfsmount *source_mnt, struct list_head *tree_list)
+                    struct mount *source_mnt, struct list_head *tree_list)
 {
-        struct vfsmount *m, *child;
+        struct mount *m, *child;
        int ret = 0;
-        struct vfsmount *prev_dest_mnt = dest_mnt;
+        struct mount *prev_dest_mnt = dest_mnt;
-        struct vfsmount *prev_src_mnt  = source_mnt;
+        struct mount *prev_src_mnt  = source_mnt;
        LIST_HEAD(tmp_list);
        LIST_HEAD(umount_list);
        for (m = propagation_next(dest_mnt, dest_mnt); m;
                        m = propagation_next(m, dest_mnt)) {
                int type;
-                struct vfsmount *source;
+                struct mount *source;
                if (IS_MNT_NEW(m))
                        continue;
                source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
-                if (!(child = copy_tree(source, source->mnt_root, type))) {
+                if (!(child = copy_tree(source, source->mnt.mnt_root, type))) {
                        ret = -ENOMEM;
                        list_splice(tree_list, tmp_list.prev);
                        goto out;
                }
-                if (is_subdir(dest_dentry, m->mnt_root)) {
+                if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
                        mnt_set_mountpoint(m, dest_dentry, child);
                        list_add_tail(&child->mnt_hash, tree_list);
                } else {
@@ -275,7 +259,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
 out:
        br_write_lock(vfsmount_lock);
        while (!list_empty(&tmp_list)) {
-                child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
+                child = list_first_entry(&tmp_list, struct mount, mnt_hash);
                umount_tree(child, 0, &umount_list);
        }
        br_write_unlock(vfsmount_lock);
@@ -286,7 +270,7 @@ out:
 /*
 * return true if the refcount is greater than count
 */
-static inline int do_refcount_check(struct vfsmount *mnt, int count)
+static inline int do_refcount_check(struct mount *mnt, int count)
 {
        int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
        return (mycount > count);
@@ -302,10 +286,10 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
 *
 * vfsmount lock must be held for write
 */
-int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
+int propagate_mount_busy(struct mount *mnt, int refcnt)
 {
-        struct vfsmount *m, *child;
+        struct mount *m, *child;
-        struct vfsmount *parent = mnt->mnt_parent;
+        struct mount *parent = mnt->mnt_parent;
        int ret = 0;
        if (mnt == parent)
@@ -321,7 +305,7 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
        for (m = propagation_next(parent, parent); m;
                        m = propagation_next(m, parent)) {
-                child = __lookup_mnt(m, mnt->mnt_mountpoint, 0);
+                child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0);
                if (child && list_empty(&child->mnt_mounts) &&
                    (ret = do_refcount_check(child, 1)))
                        break;
@@ -333,17 +317,17 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
 * parent propagates to.
 */
-static void __propagate_umount(struct vfsmount *mnt)
+static void __propagate_umount(struct mount *mnt)
 {
-        struct vfsmount *parent = mnt->mnt_parent;
+        struct mount *parent = mnt->mnt_parent;
-        struct vfsmount *m;
+        struct mount *m;
        BUG_ON(parent == mnt);
        for (m = propagation_next(parent, parent); m;
                        m = propagation_next(m, parent)) {
-                struct vfsmount *child = __lookup_mnt(m,
+                struct mount *child = __lookup_mnt(&m->mnt,
                                        mnt->mnt_mountpoint, 0);
                /*
                 * umount the child only if the child has no
@@ -363,7 +347,7 @@ static void __propagate_umount(struct vfsmount *mnt)
 */
 int propagate_umount(struct list_head *list)
 {
-        struct vfsmount *mnt;
+        struct mount *mnt;
        list_for_each_entry(mnt, list, mnt_hash)
                __propagate_umount(mnt);
diff --git a/fs/pnode.h b/fs/pnode.h
index 1ea4ae1efcd3..65c60979d541 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -9,13 +9,13 @@
 #define _LINUX_PNODE_H
 #include <linux/list.h>
-#include <linux/mount.h>
+#include "mount.h"
-#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
+#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
-#define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
+#define IS_MNT_SLAVE(m) ((m)->mnt_master)
-#define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
+#define IS_MNT_NEW(m)  (!(m)->mnt_ns)
-#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
+#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
-#define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
+#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
 #define CL_EXPIRE               0x01
 #define CL_SLAVE                0x02
@@ -23,17 +23,25 @@
 #define CL_MAKE_SHARED          0x08
 #define CL_PRIVATE              0x10
-static inline void set_mnt_shared(struct vfsmount *mnt)
+static inline void set_mnt_shared(struct mount *mnt)
 {
-        mnt->mnt_flags &= ~MNT_SHARED_MASK;
+        mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
-        mnt->mnt_flags |= MNT_SHARED;
+        mnt->mnt.mnt_flags |= MNT_SHARED;
 }
-void change_mnt_propagation(struct vfsmount *, int);
+void change_mnt_propagation(struct mount *, int);
-int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
+int propagate_mnt(struct mount *, struct dentry *, struct mount *,
                struct list_head *);
 int propagate_umount(struct list_head *);
-int propagate_mount_busy(struct vfsmount *, int);
+int propagate_mount_busy(struct mount *, int);
-void mnt_release_group_id(struct vfsmount *);
+void mnt_release_group_id(struct mount *);
-int get_dominating_id(struct vfsmount *mnt, const struct path *root);
+int get_dominating_id(struct mount *mnt, const struct path *root);
+unsigned int mnt_get_count(struct mount *mnt);
+void mnt_set_mountpoint(struct mount *, struct dentry *,
+                        struct mount *);
+void release_mounts(struct list_head *);
+void umount_tree(struct mount *, int, struct list_head *);
+struct mount *copy_tree(struct mount *, struct dentry *, int);
+bool is_path_reachable(struct mount *, struct dentry *,
+                         const struct path *root);
 #endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd228d1..8c344f037bd0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        sigemptyset(&sigign);
        sigemptyset(&sigcatch);
-        cutime = cstime = utime = stime = cputime_zero;
+        cutime = cstime = utime = stime = 0;
-        cgtime = gtime = cputime_zero;
+        cgtime = gtime = 0;
        if (lock_task_sighand(task, &flags)) {
                struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
-                                gtime = cputime_add(gtime, t->gtime);
+                                gtime += t->gtime;
                                t = next_thread(t);
                        } while (t != task);
                        min_flt += sig->min_flt;
                        maj_flt += sig->maj_flt;
                        thread_group_times(task, &utime, &stime);
-                        gtime = cputime_add(gtime, sig->gtime);
+                        gtime += sig->gtime;
                }
                sid = task_session_nr_ns(task, ns);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 851ba3dcdc29..a1dddda999f2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -101,7 +101,7 @@
 struct pid_entry {
        char *name;
        int len;
-        mode_t mode;
+        umode_t mode;
        const struct inode_operations *iop;
        const struct file_operations *fop;
        union proc_op op;
@@ -631,120 +631,6 @@ static const struct inode_operations proc_def_inode_operations = {
        .setattr        = proc_setattr,
 };
-static int mounts_open_common(struct inode *inode, struct file *file,
-                              const struct seq_operations *op)
-{
-        struct task_struct *task = get_proc_task(inode);
-        struct nsproxy *nsp;
-        struct mnt_namespace *ns = NULL;
-        struct path root;
-        struct proc_mounts *p;
-        int ret = -EINVAL;
-        if (task) {
-                rcu_read_lock();
-                nsp = task_nsproxy(task);
-                if (nsp) {
-                        ns = nsp->mnt_ns;
-                        if (ns)
-                                get_mnt_ns(ns);
-                }
-                rcu_read_unlock();
-                if (ns && get_task_root(task, &root) == 0)
-                        ret = 0;
-                put_task_struct(task);
-        }
-        if (!ns)
-                goto err;
-        if (ret)
-                goto err_put_ns;
-        ret = -ENOMEM;
-        p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
-        if (!p)
-                goto err_put_path;
-        file->private_data = &p->m;
-        ret = seq_open(file, op);
-        if (ret)
-                goto err_free;
-        p->m.private = p;
-        p->ns = ns;
-        p->root = root;
-        p->m.poll_event = ns->event;
-        return 0;
- err_free:
-        kfree(p);
- err_put_path:
-        path_put(&root);
- err_put_ns:
-        put_mnt_ns(ns);
- err:
-        return ret;
-}
-static int mounts_release(struct inode *inode, struct file *file)
-{
-        struct proc_mounts *p = file->private_data;
-        path_put(&p->root);
-        put_mnt_ns(p->ns);
-        return seq_release(inode, file);
-}
-static unsigned mounts_poll(struct file *file, poll_table *wait)
-{
-        struct proc_mounts *p = file->private_data;
-        unsigned res = POLLIN | POLLRDNORM;
-        poll_wait(file, &p->ns->poll, wait);
-        if (mnt_had_events(p))
-                res |= POLLERR | POLLPRI;
-        return res;
-}
-static int mounts_open(struct inode *inode, struct file *file)
-{
-        return mounts_open_common(inode, file, &mounts_op);
-}
-static const struct file_operations proc_mounts_operations = {
-        .open           = mounts_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = mounts_release,
-        .poll           = mounts_poll,
-};
-static int mountinfo_open(struct inode *inode, struct file *file)
-{
-        return mounts_open_common(inode, file, &mountinfo_op);
-}
-static const struct file_operations proc_mountinfo_operations = {
-        .open           = mountinfo_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = mounts_release,
-        .poll           = mounts_poll,
-};
-static int mountstats_open(struct inode *inode, struct file *file)
-{
-        return mounts_open_common(inode, file, &mountstats_op);
-}
-static const struct file_operations proc_mountstats_operations = {
-        .open           = mountstats_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = mounts_release,
-};
 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
 static ssize_t proc_info_read(struct file * file, char __user * buf,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 10090d9c7ad5..2edf34f2eb61 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,7 +597,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
                                          const char *name,
-                                          mode_t mode,
+                                          umode_t mode,
                                          nlink_t nlink)
 {
        struct proc_dir_entry *ent = NULL;
@@ -659,7 +659,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
 }
 EXPORT_SYMBOL(proc_symlink);
-struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
+struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
                struct proc_dir_entry *parent)
 {
        struct proc_dir_entry *ent;
@@ -699,7 +699,7 @@ struct proc_dir_entry *proc_mkdir(const char *name,
 }
 EXPORT_SYMBOL(proc_mkdir);
-struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
                                         struct proc_dir_entry *parent)
 {
        struct proc_dir_entry *ent;
@@ -728,7 +728,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL(create_proc_entry);
-struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
                                        struct proc_dir_entry *parent,
                                        const struct file_operations *proc_fops,
                                        void *data)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7737c5468a40..51a176622b8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -77,7 +77,6 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
 static void proc_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f702acb..27da860115c6 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -9,7 +9,6 @@
 #include <linux/file.h>
 #include <linux/utsname.h>
 #include <net/net_namespace.h>
-#include <linux/mnt_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/pid_namespace.h>
 #include "internal.h"
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index f738024ccc8e..06e1cc17caf6 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -179,7 +179,7 @@ const struct file_operations proc_net_operations = {
 struct proc_dir_entry *proc_net_fops_create(struct net *net,
-        const char *name, mode_t mode, const struct file_operations *fops)
+        const char *name, umode_t mode, const struct file_operations *fops)
 {
        return proc_create(name, mode, net->proc_net, fops);
 }
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b77b874..03102d978180 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -91,20 +91,18 @@ static struct file_system_type proc_fs_type = {
 void __init proc_root_init(void)
 {
-        struct vfsmount *mnt;
        int err;
        proc_init_inodecache();
        err = register_filesystem(&proc_fs_type);
        if (err)
                return;
-        mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
+        err = pid_ns_prepare_proc(&init_pid_ns);
-        if (IS_ERR(mnt)) {
+        if (err) {
                unregister_filesystem(&proc_fs_type);
                return;
        }
-        init_pid_ns.proc_mnt = mnt;
        proc_symlink("mounts", NULL, "self/mounts");
        proc_net_init();
@@ -209,5 +207,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
 void pid_ns_release_proc(struct pid_namespace *ns)
 {
-        mntput(ns->proc_mnt);
+        kern_unmount(ns->proc_mnt);
 }
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 2a30d67dd6b8..d76ca6ae2b1b 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,31 +22,29 @@
 #define arch_idle_time(cpu) 0
 #endif
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
 {
-        u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
+        u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
-        cputime64_t idle;
        if (idle_time == -1ULL) {
                /* !NO_HZ so we can rely on cpustat.idle */
-                idle = kstat_cpu(cpu).cpustat.idle;
+                idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
-                idle = cputime64_add(idle, arch_idle_time(cpu));
+                idle += arch_idle_time(cpu);
        } else
-                idle = nsecs_to_jiffies64(1000 * idle_time);
+                idle = usecs_to_cputime64(idle_time);
        return idle;
 }
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
 {
-        u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+        u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
-        cputime64_t iowait;
        if (iowait_time == -1ULL)
                /* !NO_HZ so we can rely on cpustat.iowait */
-                iowait = kstat_cpu(cpu).cpustat.iowait;
+                iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
        else
-                iowait = nsecs_to_jiffies64(1000 * iowait_time);
+                iowait = usecs_to_cputime64(iowait_time);
        return iowait;
 }
@@ -55,33 +53,30 @@ static int show_stat(struct seq_file *p, void *v)
 {
        int i, j;
        unsigned long jif;
-        cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+        u64 user, nice, system, idle, iowait, irq, softirq, steal;
-        cputime64_t guest, guest_nice;
+        u64 guest, guest_nice;
        u64 sum = 0;
        u64 sum_softirq = 0;
        unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
        struct timespec boottime;
        user = nice = system = idle = iowait =
-                irq = softirq = steal = cputime64_zero;
+                irq = softirq = steal = 0;
-        guest = guest_nice = cputime64_zero;
+        guest = guest_nice = 0;
        getboottime(&boottime);
        jif = boottime.tv_sec;
        for_each_possible_cpu(i) {
-                user = cputime64_add(user, kstat_cpu(i).cpustat.user);
+                user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
-                nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
+                nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
-                system = cputime64_add(system, kstat_cpu(i).cpustat.system);
+                system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
-                idle = cputime64_add(idle, get_idle_time(i));
+                idle += get_idle_time(i);
-                iowait = cputime64_add(iowait, get_iowait_time(i));
+                iowait += get_iowait_time(i);
-                irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
+                irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
-                softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
+                softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
-                steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
+                steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
-                guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+                guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
-                guest_nice = cputime64_add(guest_nice,
+                guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
-                        kstat_cpu(i).cpustat.guest_nice);
-                sum += kstat_cpu_irqs_sum(i);
-                sum += arch_irq_stat_cpu(i);
                for (j = 0; j < NR_SOFTIRQS; j++) {
                        unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@@ -106,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
                (unsigned long long)cputime64_to_clock_t(guest_nice));
        for_each_online_cpu(i) {
                /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
-                user = kstat_cpu(i).cpustat.user;
+                user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
-                nice = kstat_cpu(i).cpustat.nice;
+                nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
-                system = kstat_cpu(i).cpustat.system;
+                system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
                idle = get_idle_time(i);
                iowait = get_iowait_time(i);
-                irq = kstat_cpu(i).cpustat.irq;
+                irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
-                softirq = kstat_cpu(i).cpustat.softirq;
+                softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
-                steal = kstat_cpu(i).cpustat.steal;
+                steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
-                guest = kstat_cpu(i).cpustat.guest;
+                guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
-                guest_nice = kstat_cpu(i).cpustat.guest_nice;
+                guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
                seq_printf(p,
                        "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
                        "%llu\n",
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d456050..9610ac772d7e 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 {
        struct timespec uptime;
        struct timespec idle;
+        u64 idletime;
+        u64 nsec;
+        u32 rem;
        int i;
-        cputime_t idletime = cputime_zero;
+        idletime = 0;
        for_each_possible_cpu(i)
-                idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
+                idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
        do_posix_clock_monotonic_gettime(&uptime);
        monotonic_to_bootbased(&uptime);
-        cputime_to_timespec(idletime, &idle);
+        nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
+        idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+        idle.tv_nsec = rem;
        seq_printf(m, "%lu.%02lu %lu.%02lu\n",
                        (unsigned long) uptime.tv_sec,
                        (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
new file mode 100644
index 000000000000..12412852d88a
--- /dev/null
+++ b/fs/proc_namespace.c
@@ -0,0 +1,333 @@
+/*
+ * fs/proc_namespace.c - handling of /proc/<pid>/{mounts,mountinfo,mountstats}
+ *
+ * In fact, that's a piece of procfs; it's *almost* isolated from
+ * the rest of fs/proc, but has rather close relationships with
+ * fs/namespace.c, thus here instead of fs/proc
+ *
+ */
+#include <linux/mnt_namespace.h>
+#include <linux/nsproxy.h>
+#include <linux/security.h>
+#include <linux/fs_struct.h>
+#include "proc/internal.h" /* only for get_proc_task() in ->open() */
+#include "pnode.h"
+#include "internal.h"
+static unsigned mounts_poll(struct file *file, poll_table *wait)
+{
+        struct proc_mounts *p = file->private_data;
+        struct mnt_namespace *ns = p->ns;
+        unsigned res = POLLIN | POLLRDNORM;
+        poll_wait(file, &p->ns->poll, wait);
+        br_read_lock(vfsmount_lock);
+        if (p->m.poll_event != ns->event) {
+                p->m.poll_event = ns->event;
+                res |= POLLERR | POLLPRI;
+        }
+        br_read_unlock(vfsmount_lock);
+        return res;
+}
+struct proc_fs_info {
+        int flag;
+        const char *str;
+};
+static int show_sb_opts(struct seq_file *m, struct super_block *sb)
+{
+        static const struct proc_fs_info fs_info[] = {
+                { MS_SYNCHRONOUS, ",sync" },
+                { MS_DIRSYNC, ",dirsync" },
+                { MS_MANDLOCK, ",mand" },
+                { 0, NULL }
+        };
+        const struct proc_fs_info *fs_infop;
+        for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
+                if (sb->s_flags & fs_infop->flag)
+                        seq_puts(m, fs_infop->str);
+        }
+        return security_sb_show_options(m, sb);
+}
+static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+{
+        static const struct proc_fs_info mnt_info[] = {
+                { MNT_NOSUID, ",nosuid" },
+                { MNT_NODEV, ",nodev" },
+                { MNT_NOEXEC, ",noexec" },
+                { MNT_NOATIME, ",noatime" },
+                { MNT_NODIRATIME, ",nodiratime" },
+                { MNT_RELATIME, ",relatime" },
+                { 0, NULL }
+        };
+        const struct proc_fs_info *fs_infop;
+        for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
+                if (mnt->mnt_flags & fs_infop->flag)
+                        seq_puts(m, fs_infop->str);
+        }
+}
+static inline void mangle(struct seq_file *m, const char *s)
+{
+        seq_escape(m, s, " \t\n\\");
+}
+static void show_type(struct seq_file *m, struct super_block *sb)
+{
+        mangle(m, sb->s_type->name);
+        if (sb->s_subtype && sb->s_subtype[0]) {
+                seq_putc(m, '.');
+                mangle(m, sb->s_subtype);
+        }
+}
+static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
+{
+        struct mount *r = real_mount(mnt);
+        int err = 0;
+        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+        struct super_block *sb = mnt_path.dentry->d_sb;
+        if (sb->s_op->show_devname) {
+                err = sb->s_op->show_devname(m, mnt_path.dentry);
+                if (err)
+                        goto out;
+        } else {
+                mangle(m, r->mnt_devname ? r->mnt_devname : "none");
+        }
+        seq_putc(m, ' ');
+        seq_path(m, &mnt_path, " \t\n\\");
+        seq_putc(m, ' ');
+        show_type(m, sb);
+        seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
+        err = show_sb_opts(m, sb);
+        if (err)
+                goto out;
+        show_mnt_opts(m, mnt);
+        if (sb->s_op->show_options)
+                err = sb->s_op->show_options(m, mnt_path.dentry);
+        seq_puts(m, " 0 0\n");
+out:
+        return err;
+}
+static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
+{
+        struct proc_mounts *p = m->private;
+        struct mount *r = real_mount(mnt);
+        struct super_block *sb = mnt->mnt_sb;
+        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+        struct path root = p->root;
+        int err = 0;
+        seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
+                   MAJOR(sb->s_dev), MINOR(sb->s_dev));
+        if (sb->s_op->show_path)
+                err = sb->s_op->show_path(m, mnt->mnt_root);
+        else
+                seq_dentry(m, mnt->mnt_root, " \t\n\\");
+        if (err)
+                goto out;
+        seq_putc(m, ' ');
+        /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+        err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+        if (err)
+                goto out;
+        seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
+        show_mnt_opts(m, mnt);
+        /* Tagged fields ("foo:X" or "bar") */
+        if (IS_MNT_SHARED(r))
+                seq_printf(m, " shared:%i", r->mnt_group_id);
+        if (IS_MNT_SLAVE(r)) {
+                int master = r->mnt_master->mnt_group_id;
+                int dom = get_dominating_id(r, &p->root);
+                seq_printf(m, " master:%i", master);
+                if (dom && dom != master)
+                        seq_printf(m, " propagate_from:%i", dom);
+        }
+        if (IS_MNT_UNBINDABLE(r))
+                seq_puts(m, " unbindable");
+        /* Filesystem specific data */
+        seq_puts(m, " - ");
+        show_type(m, sb);
+        seq_putc(m, ' ');
+        if (sb->s_op->show_devname)
+                err = sb->s_op->show_devname(m, mnt->mnt_root);
+        else
+                mangle(m, r->mnt_devname ? r->mnt_devname : "none");
+        if (err)
+                goto out;
+        seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
+        err = show_sb_opts(m, sb);
+        if (err)
+                goto out;
+        if (sb->s_op->show_options)
+                err = sb->s_op->show_options(m, mnt->mnt_root);
+        seq_putc(m, '\n');
+out:
+        return err;
+}
+static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
+{
+        struct mount *r = real_mount(mnt);
+        struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
+        struct super_block *sb = mnt_path.dentry->d_sb;
+        int err = 0;
+        /* device */
+        if (sb->s_op->show_devname) {
+                seq_puts(m, "device ");
+                err = sb->s_op->show_devname(m, mnt_path.dentry);
+        } else {
+                if (r->mnt_devname) {
+                        seq_puts(m, "device ");
+                        mangle(m, r->mnt_devname);
+                } else
+                        seq_puts(m, "no device");
+        }
+        /* mount point */
+        seq_puts(m, " mounted on ");
+        seq_path(m, &mnt_path, " \t\n\\");
+        seq_putc(m, ' ');
+        /* file system type */
+        seq_puts(m, "with fstype ");
+        show_type(m, sb);
+        /* optional statistics */
+        if (sb->s_op->show_stats) {
+                seq_putc(m, ' ');
+                if (!err)
+                        err = sb->s_op->show_stats(m, mnt_path.dentry);
+        }
+        seq_putc(m, '\n');
+        return err;
+}
+static int mounts_open_common(struct inode *inode, struct file *file,
+                              int (*show)(struct seq_file *, struct vfsmount *))
+{
+        struct task_struct *task = get_proc_task(inode);
+        struct nsproxy *nsp;
+        struct mnt_namespace *ns = NULL;
+        struct path root;
+        struct proc_mounts *p;
+        int ret = -EINVAL;
+        if (!task)
+                goto err;
+        rcu_read_lock();
+        nsp = task_nsproxy(task);
+        if (!nsp) {
+                rcu_read_unlock();
+                put_task_struct(task);
+                goto err;
+        }
+        ns = nsp->mnt_ns;
+        if (!ns) {
+                rcu_read_unlock();
+                put_task_struct(task);
+                goto err;
+        }
+        get_mnt_ns(ns);
+        rcu_read_unlock();
+        task_lock(task);
+        if (!task->fs) {
+                task_unlock(task);
+                put_task_struct(task);
+                ret = -ENOENT;
+                goto err_put_ns;
+        }
+        get_fs_root(task->fs, &root);
+        task_unlock(task);
+        put_task_struct(task);
+        ret = -ENOMEM;
+        p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
+        if (!p)
+                goto err_put_path;
+        file->private_data = &p->m;
+        ret = seq_open(file, &mounts_op);
+        if (ret)
+                goto err_free;
+        p->m.private = p;
+        p->ns = ns;
+        p->root = root;
+        p->m.poll_event = ns->event;
+        p->show = show;
+        return 0;
+ err_free:
+        kfree(p);
+ err_put_path:
+        path_put(&root);
+ err_put_ns:
+        put_mnt_ns(ns);
+ err:
+        return ret;
+}
+static int mounts_release(struct inode *inode, struct file *file)
+{
+        struct proc_mounts *p = file->private_data;
+        path_put(&p->root);
+        put_mnt_ns(p->ns);
+        return seq_release(inode, file);
+}
+static int mounts_open(struct inode *inode, struct file *file)
+{
+        return mounts_open_common(inode, file, show_vfsmnt);
+}
+static int mountinfo_open(struct inode *inode, struct file *file)
+{
+        return mounts_open_common(inode, file, show_mountinfo);
+}
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+        return mounts_open_common(inode, file, show_vfsstat);
+}
+const struct file_operations proc_mounts_operations = {
+        .open           = mounts_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = mounts_release,
+        .poll           = mounts_poll,
+};
+const struct file_operations proc_mountinfo_operations = {
+        .open           = mountinfo_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = mounts_release,
+        .poll           = mounts_poll,
+};
+const struct file_operations proc_mountstats_operations = {
+        .open           = mountstats_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = mounts_release,
+};
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 379a02dc1217..b3b426edb2fd 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -80,7 +80,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct pstore_private *p = dentry->d_inode->i_private;
-        p->psi->erase(p->type, p->id, p->psi);
+        if (p->psi->erase)
+                p->psi->erase(p->type, p->id, p->psi);
        return simple_unlink(dir, dentry);
 }
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 57bbf9078ac8..9ec22d3b4293 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -122,7 +122,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
                memcpy(dst, s1 + s1_start, l1_cpy);
                memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
-                ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part,
+                ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
                                   hsize + l1_cpy + l2_cpy, psinfo);
                if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
                        pstore_new_entry = 1;
@@ -207,8 +207,7 @@ void pstore_get_records(int quiet)
                return;
        mutex_lock(&psi->read_mutex);
-        rc = psi->open(psi);
+        if (psi->open && psi->open(psi))
-        if (rc)
                goto out;
        while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
@@ -219,7 +218,8 @@ void pstore_get_records(int quiet)
                if (rc && (rc != -EEXIST || !quiet))
                        failed++;
        }
-        psi->close(psi);
+        if (psi->close)
+                psi->close(psi);
 out:
        mutex_unlock(&psi->read_mutex);
@@ -243,33 +243,5 @@ static void pstore_timefunc(unsigned long dummy)
        mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
 }
-/*
- * Call platform driver to write a record to the
- * persistent store.
- */
-int pstore_write(enum pstore_type_id type, char *buf, size_t size)
-{
-        u64             id;
-        int             ret;
-        unsigned long   flags;
-        if (!psinfo)
-                return -ENODEV;
-        if (size > psinfo->bufsize)
-                return -EFBIG;
-        spin_lock_irqsave(&psinfo->buf_lock, flags);
-        memcpy(psinfo->buf, buf, size);
-        ret = psinfo->write(type, &id, 0, size, psinfo);
-        if (ret == 0 && pstore_is_mounted())
-                pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
-                              size, CURRENT_TIME, psinfo);
-        spin_unlock_irqrestore(&psinfo->buf_lock, flags);
-        return 0;
-}
-EXPORT_SYMBOL_GPL(pstore_write);
 module_param(backend, charp, 0444);
 MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 3bdd21418432..2bfd987f4853 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -199,12 +199,13 @@ static const char *qnx4_checkroot(struct super_block *sb)
                                        if (!strcmp(rootdir->di_fname,
                                                    QNX4_BMNAME)) {
                                                found = 1;
-                                                qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL );
+                                                qnx4_sb(sb)->BitMap = kmemdup(rootdir,
+                                                                              sizeof(struct qnx4_inode_entry),
+                                                                              GFP_KERNEL);
                                                if (!qnx4_sb(sb)->BitMap) {
                                                        brelse (bh);
                                                        return "not enough memory for bitmap inode";
-                                                }
+                                                }/* keep bitmap inode known */
-                                                memcpy( qnx4_sb(sb)->BitMap, rootdir, sizeof( struct qnx4_inode_entry ) );      /* keep bitmap inode known */
                                                break;
                                        }
                                }
@@ -427,7 +428,6 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
 static void qnx4_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
 }
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5b572c89e6c4..5ec59b20cf76 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -73,7 +73,6 @@
 #include <linux/security.h>
 #include <linux/kmod.h>
 #include <linux/namei.h>
-#include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include "../internal.h" /* ugh */
@@ -2199,7 +2198,7 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
        if (error)
                return error;
        /* Quota file not on the same filesystem? */
-        if (path->mnt->mnt_sb != sb)
+        if (path->dentry->d_sb != sb)
                error = -EXDEV;
        else
                error = vfs_load_quota_inode(path->dentry->d_inode, type,
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 35f4b0ecdeb3..7898cd688a00 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
-#include <linux/buffer_head.h>
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/types.h>
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 462ceb38fec6..aec766abe3af 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -52,7 +52,7 @@ static struct backing_dev_info ramfs_backing_dev_info = {
 };
 struct inode *ramfs_get_inode(struct super_block *sb,
-                                const struct inode *dir, int mode, dev_t dev)
+                                const struct inode *dir, umode_t mode, dev_t dev)
 {
        struct inode * inode = new_inode(sb);
@@ -92,7 +92,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
 */
 /* SMP-safe */
 static int
-ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
        int error = -ENOSPC;
@@ -106,7 +106,7 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
        return error;
 }
-static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
        int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
        if (!retval)
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
        return retval;
 }
-static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
 {
        return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index d1aca1df4f92..a945cd265228 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -13,6 +13,7 @@
 #include <linux/reiserfs_fs_sb.h>
 #include <linux/reiserfs_fs_i.h>
 #include <linux/quotaops.h>
+#include <linux/seq_file.h>
 #define PREALLOCATION_SIZE 9
@@ -634,6 +635,96 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
        return 0;
 }
+static void print_sep(struct seq_file *seq, int *first)
+{
+        if (!*first)
+                seq_puts(seq, ":");
+        else
+                *first = 0;
+}
+void show_alloc_options(struct seq_file *seq, struct super_block *s)
+{
+        int first = 1;
+        if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
+                (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
+                return;
+        seq_puts(seq, ",alloc=");
+        if (TEST_OPTION(concentrating_formatted_nodes, s)) {
+                print_sep(seq, &first);
+                if (REISERFS_SB(s)->s_alloc_options.border != 10) {
+                        seq_printf(seq, "concentrating_formatted_nodes=%d",
+                                100 / REISERFS_SB(s)->s_alloc_options.border);
+                } else
+                        seq_puts(seq, "concentrating_formatted_nodes");
+        }
+        if (TEST_OPTION(displacing_large_files, s)) {
+                print_sep(seq, &first);
+                if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
+                        seq_printf(seq, "displacing_large_files=%lu",
+                            REISERFS_SB(s)->s_alloc_options.large_file_size);
+                } else
+                        seq_puts(seq, "displacing_large_files");
+        }
+        if (TEST_OPTION(displacing_new_packing_localities, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "displacing_new_packing_localities");
+        }
+        if (TEST_OPTION(old_hashed_relocation, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "old_hashed_relocation");
+        }
+        if (TEST_OPTION(new_hashed_relocation, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "new_hashed_relocation");
+        }
+        if (TEST_OPTION(dirid_groups, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "dirid_groups");
+        }
+        if (TEST_OPTION(oid_groups, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "oid_groups");
+        }
+        if (TEST_OPTION(packing_groups, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "packing_groups");
+        }
+        if (TEST_OPTION(hashed_formatted_nodes, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "hashed_formatted_nodes");
+        }
+        if (TEST_OPTION(skip_busy, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "skip_busy");
+        }
+        if (TEST_OPTION(hundredth_slices, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "hundredth_slices");
+        }
+        if (TEST_OPTION(old_way, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "old_way");
+        }
+        if (TEST_OPTION(displace_based_on_dirid, s)) {
+                print_sep(seq, &first);
+                seq_puts(seq, "displace_based_on_dirid");
+        }
+        if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
+                print_sep(seq, &first);
+                seq_printf(seq, "preallocmin=%d",
+                                REISERFS_SB(s)->s_alloc_options.preallocmin);
+        }
+        if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
+                print_sep(seq, &first);
+                seq_printf(seq, "preallocsize=%d",
+                                REISERFS_SB(s)->s_alloc_options.preallocsize);
+        }
+}
 static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
 {
        char *hash_in;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 950f13af0951..9e8cd5acd79c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1766,7 +1766,7 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
   for the fresh inode.  This can only be done outside a transaction, so
   if we return non-zero, we also end the transaction.  */
 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
-                       struct inode *dir, int mode, const char *symname,
+                       struct inode *dir, umode_t mode, const char *symname,
                       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
                          strlen (symname) for symlinks) */
                       loff_t i_size, struct dentry *dentry,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 4e153051bc75..950e3d1b5c9e 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -55,7 +55,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                                break;
                        }
-                        err = mnt_want_write(filp->f_path.mnt);
+                        err = mnt_want_write_file(filp);
                        if (err)
                                break;
@@ -96,7 +96,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        inode->i_ctime = CURRENT_TIME_SEC;
                        mark_inode_dirty(inode);
 setflags_out:
-                        mnt_drop_write(filp->f_path.mnt);
+                        mnt_drop_write_file(filp);
                        break;
                }
        case REISERFS_IOC_GETVERSION:
@@ -107,7 +107,7 @@ setflags_out:
                        err = -EPERM;
                        break;
                }
-                err = mnt_want_write(filp->f_path.mnt);
+                err = mnt_want_write_file(filp);
                if (err)
                        break;
                if (get_user(inode->i_generation, (int __user *)arg)) {
@@ -117,7 +117,7 @@ setflags_out:
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
 setversion_out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                break;
        default:
                err = -ENOTTY;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 80058e8ce361..146378865239 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -559,7 +559,7 @@ static int drop_new_inode(struct inode *inode)
 ** outside of a transaction, so we had to pull some bits of
 ** reiserfs_new_inode out into this func.
 */
-static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
+static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
 {
        /* Make inode invalid - just in case we are going to drop it before
         * the initialization happens */
@@ -572,7 +572,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
        return 0;
 }
-static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                           struct nameidata *nd)
 {
        int retval;
@@ -643,7 +643,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
        return retval;
 }
-static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
                          dev_t rdev)
 {
        int retval;
@@ -721,7 +721,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
        return retval;
 }
-static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        int retval;
        struct inode *inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 14363b96b6af..19c454e61b79 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
+#include <linux/seq_file.h>
 struct file_system_type reiserfs_fs_type;
@@ -61,6 +62,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
 static int reiserfs_remount(struct super_block *s, int *flags, char *data);
 static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
+void show_alloc_options(struct seq_file *seq, struct super_block *s);
 static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
@@ -532,7 +534,6 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 static void reiserfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
 }
@@ -597,6 +598,82 @@ out:
        reiserfs_write_unlock_once(inode->i_sb, lock_depth);
 }
+static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+        struct super_block *s = root->d_sb;
+        struct reiserfs_journal *journal = SB_JOURNAL(s);
+        long opts = REISERFS_SB(s)->s_mount_opt;
+        if (opts & (1 << REISERFS_LARGETAIL))
+                seq_puts(seq, ",tails=on");
+        else if (!(opts & (1 << REISERFS_SMALLTAIL)))
+                seq_puts(seq, ",notail");
+        /* tails=small is default so we don't show it */
+        if (!(opts & (1 << REISERFS_BARRIER_FLUSH)))
+                seq_puts(seq, ",barrier=none");
+        /* barrier=flush is default so we don't show it */
+        if (opts & (1 << REISERFS_ERROR_CONTINUE))
+                seq_puts(seq, ",errors=continue");
+        else if (opts & (1 << REISERFS_ERROR_PANIC))
+                seq_puts(seq, ",errors=panic");
+        /* errors=ro is default so we don't show it */
+        if (opts & (1 << REISERFS_DATA_LOG))
+                seq_puts(seq, ",data=journal");
+        else if (opts & (1 << REISERFS_DATA_WRITEBACK))
+                seq_puts(seq, ",data=writeback");
+        /* data=ordered is default so we don't show it */
+        if (opts & (1 << REISERFS_ATTRS))
+                seq_puts(seq, ",attrs");
+        if (opts & (1 << REISERFS_XATTRS_USER))
+                seq_puts(seq, ",user_xattr");
+        if (opts & (1 << REISERFS_EXPOSE_PRIVROOT))
+                seq_puts(seq, ",expose_privroot");
+        if (opts & (1 << REISERFS_POSIXACL))
+                seq_puts(seq, ",acl");
+        if (REISERFS_SB(s)->s_jdev)
+                seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
+        if (journal->j_max_commit_age != journal->j_default_max_commit_age)
+                seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
+#ifdef CONFIG_QUOTA
+        if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
+                seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
+        else if (opts & (1 << REISERFS_USRQUOTA))
+                seq_puts(seq, ",usrquota");
+        if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
+                seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
+        else if (opts & (1 << REISERFS_GRPQUOTA))
+                seq_puts(seq, ",grpquota");
+        if (REISERFS_SB(s)->s_jquota_fmt) {
+                if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD)
+                        seq_puts(seq, ",jqfmt=vfsold");
+                else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0)
+                        seq_puts(seq, ",jqfmt=vfsv0");
+        }
+#endif
+        /* Block allocator options */
+        if (opts & (1 << REISERFS_NO_BORDER))
+                seq_puts(seq, ",block-allocator=noborder");
+        if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+                seq_puts(seq, ",block-allocator=no_unhashed_relocation");
+        if (opts & (1 << REISERFS_HASHED_RELOCATION))
+                seq_puts(seq, ",block-allocator=hashed_relocation");
+        if (opts & (1 << REISERFS_TEST4))
+                seq_puts(seq, ",block-allocator=test4");
+        show_alloc_options(seq, s);
+        return 0;
+}
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
                                    size_t, loff_t);
@@ -617,7 +694,7 @@ static const struct super_operations reiserfs_sops = {
        .unfreeze_fs = reiserfs_unfreeze,
        .statfs = reiserfs_statfs,
        .remount_fs = reiserfs_remount,
-        .show_options = generic_show_options,
+        .show_options = reiserfs_show_options,
 #ifdef CONFIG_QUOTA
        .quota_read = reiserfs_quota_read,
        .quota_write = reiserfs_quota_write,
@@ -915,9 +992,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                {"jdev",.arg_required = 'j',.values = NULL},
                {"nolargeio",.arg_required = 'w',.values = NULL},
                {"commit",.arg_required = 'c',.values = NULL},
-                {"usrquota",.setmask = 1 << REISERFS_QUOTA},
+                {"usrquota",.setmask = 1 << REISERFS_USRQUOTA},
-                {"grpquota",.setmask = 1 << REISERFS_QUOTA},
+                {"grpquota",.setmask = 1 << REISERFS_GRPQUOTA},
-                {"noquota",.clrmask = 1 << REISERFS_QUOTA},
+                {"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA},
                {"errors",.arg_required = 'e',.values = error_actions},
                {"usrjquota",.arg_required =
                 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
@@ -1031,12 +1108,19 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                        return 0;
                                }
                                strcpy(qf_names[qtype], arg);
-                                *mount_options |= 1 << REISERFS_QUOTA;
+                                if (qtype == USRQUOTA)
+                                        *mount_options |= 1 << REISERFS_USRQUOTA;
+                                else
+                                        *mount_options |= 1 << REISERFS_GRPQUOTA;
                        } else {
                                if (qf_names[qtype] !=
                                    REISERFS_SB(s)->s_qf_names[qtype])
                                        kfree(qf_names[qtype]);
                                qf_names[qtype] = NULL;
+                                if (qtype == USRQUOTA)
+                                        *mount_options &= ~(1 << REISERFS_USRQUOTA);
+                                else
+                                        *mount_options &= ~(1 << REISERFS_GRPQUOTA);
                        }
                }
                if (c == 'f') {
@@ -1075,9 +1159,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                 "journaled quota format not specified.");
                return 0;
        }
-        /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
+        if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) &&
-        if (!(*mount_options & (1 << REISERFS_QUOTA))
+               sb_has_quota_loaded(s, USRQUOTA)) ||
-            && sb_any_quota_loaded(s)) {
+            (!(*mount_options & (1 << REISERFS_GRPQUOTA)) &&
+               sb_has_quota_loaded(s, GRPQUOTA))) {
                reiserfs_warning(s, "super-6516", "quota options must "
                                 "be present when quota is turned on.");
                return 0;
@@ -1225,7 +1310,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
        safe_mask |= 1 << REISERFS_ERROR_RO;
        safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
        safe_mask |= 1 << REISERFS_ERROR_PANIC;
-        safe_mask |= 1 << REISERFS_QUOTA;
+        safe_mask |= 1 << REISERFS_USRQUOTA;
+        safe_mask |= 1 << REISERFS_GRPQUOTA;
        /* Update the bitmask, taking care to keep
         * the bits we're not allowed to change here */
@@ -1672,6 +1758,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
             &commit_max_age, qf_names, &qfmt) == 0) {
                goto error;
        }
+        if (jdev_name && jdev_name[0]) {
+                REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
+                if (!REISERFS_SB(s)->s_jdev) {
+                        SWARN(silent, s, "", "Cannot allocate memory for "
+                                "journal device name");
+                        goto error;
+                }
+        }
 #ifdef CONFIG_QUOTA
        handle_quota_files(s, qf_names, &qfmt);
 #endif
@@ -2054,12 +2148,13 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
        int err;
        struct inode *inode;
        struct reiserfs_transaction_handle th;
+        int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
-        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
+        if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
                return -EINVAL;
        /* Quotafile not on the same filesystem? */
-        if (path->mnt->mnt_sb != sb) {
+        if (path->dentry->d_sb != sb) {
                err = -EXDEV;
                goto out;
        }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6bc346c160e7..c24deda8a8bc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -66,7 +66,7 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 }
 #endif
-static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
        return dir->i_op->mkdir(dir, dentry, mode);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 8b4089f30408..bb36ab74eb45 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -403,7 +403,6 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
 static void romfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
 }
diff --git a/fs/seq_file.c b/fs/seq_file.c
index dba43c3ea3af..4023d6be939b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -397,7 +397,7 @@ EXPORT_SYMBOL(seq_printf);
 *      Returns pointer past last written character in @s, or NULL in case of
 *      failure.
 */
-char *mangle_path(char *s, char *p, char *esc)
+char *mangle_path(char *s, const char *p, const char *esc)
 {
        while (s <= p) {
                char c = *p++;
@@ -427,7 +427,7 @@ EXPORT_SYMBOL(mangle_path);
 * return the absolute path of 'path', as represented by the
 * dentry / mnt pair in the path parameter.
 */
-int seq_path(struct seq_file *m, struct path *path, char *esc)
+int seq_path(struct seq_file *m, const struct path *path, const char *esc)
 {
        char *buf;
        size_t size = seq_get_buf(m, &buf);
@@ -450,8 +450,8 @@ EXPORT_SYMBOL(seq_path);
 /*
 * Same as seq_path, but relative to supplied root.
 */
-int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
+int seq_path_root(struct seq_file *m, const struct path *path,
-                  char *esc)
+                  const struct path *root, const char *esc)
 {
        char *buf;
        size_t size = seq_get_buf(m, &buf);
@@ -480,7 +480,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 /*
 * returns the path of the 'dentry' from the root of its filesystem.
 */
-int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
+int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
 {
        char *buf;
        size_t size = seq_get_buf(m, &buf);
diff --git a/fs/splice.c b/fs/splice.c
index fa2defa8afcf..1ec0493266b3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -25,7 +25,6 @@
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
-#include <linux/buffer_head.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 2da1715452ac..d0858c2d9a47 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -464,7 +464,6 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
 static void squashfs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
 }
diff --git a/fs/statfs.c b/fs/statfs.c
index 9cf04a118965..2aa6a22e0be2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -7,6 +7,7 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
+#include "internal.h"
 static int flags_by_mnt(int mnt_flags)
 {
@@ -45,7 +46,7 @@ static int calculate_f_flags(struct vfsmount *mnt)
                flags_by_sb(mnt->mnt_sb->s_flags);
 }
-int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
+static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 {
        int retval;
@@ -205,19 +206,23 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
        return error;
 }
-SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
+int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
 {
-        struct super_block *s;
+        struct super_block *s = user_get_super(dev);
-        struct ustat tmp;
-        struct kstatfs sbuf;
        int err;
-        s = user_get_super(new_decode_dev(dev));
        if (!s)
                return -EINVAL;
-        err = statfs_by_dentry(s->s_root, &sbuf);
+        err = statfs_by_dentry(s->s_root, sbuf);
        drop_super(s);
+        return err;
+}
+SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
+{
+        struct ustat tmp;
+        struct kstatfs sbuf;
+        int err = vfs_ustat(new_decode_dev(dev), &sbuf);
        if (err)
                return err;
diff --git a/fs/super.c b/fs/super.c
index afd0f1ad45e0..de41e1e46f09 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -136,12 +136,13 @@ static struct super_block *alloc_super(struct file_system_type *type)
                INIT_LIST_HEAD(&s->s_files);
 #endif
                s->s_bdi = &default_backing_dev_info;
-                INIT_LIST_HEAD(&s->s_instances);
+                INIT_HLIST_NODE(&s->s_instances);
                INIT_HLIST_BL_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
                INIT_LIST_HEAD(&s->s_dentry_lru);
                INIT_LIST_HEAD(&s->s_inode_lru);
                spin_lock_init(&s->s_inode_lru_lock);
+                INIT_LIST_HEAD(&s->s_mounts);
                init_rwsem(&s->s_umount);
                mutex_init(&s->s_lock);
                lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -200,6 +201,7 @@ static inline void destroy_super(struct super_block *s)
        free_percpu(s->s_files);
 #endif
        security_sb_free(s);
+        WARN_ON(!list_empty(&s->s_mounts));
        kfree(s->s_subtype);
        kfree(s->s_options);
        kfree(s);
@@ -210,7 +212,7 @@ static inline void destroy_super(struct super_block *s)
 /*
 * Drop a superblock's refcount.  The caller must hold sb_lock.
 */
-void __put_super(struct super_block *sb)
+static void __put_super(struct super_block *sb)
 {
        if (!--sb->s_count) {
                list_del_init(&sb->s_list);
@@ -225,7 +227,7 @@ void __put_super(struct super_block *sb)
 *      Drops a temporary reference, frees superblock if there's no
 *      references left.
 */
-void put_super(struct super_block *sb)
+static void put_super(struct super_block *sb)
 {
        spin_lock(&sb_lock);
        __put_super(sb);
@@ -328,7 +330,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
 bool grab_super_passive(struct super_block *sb)
 {
        spin_lock(&sb_lock);
-        if (list_empty(&sb->s_instances)) {
+        if (hlist_unhashed(&sb->s_instances)) {
                spin_unlock(&sb_lock);
                return false;
        }
@@ -337,7 +339,7 @@ bool grab_super_passive(struct super_block *sb)
        spin_unlock(&sb_lock);
        if (down_read_trylock(&sb->s_umount)) {
-                if (sb->s_root)
+                if (sb->s_root && (sb->s_flags & MS_BORN))
                        return true;
                up_read(&sb->s_umount);
        }
@@ -400,7 +402,7 @@ void generic_shutdown_super(struct super_block *sb)
        }
        spin_lock(&sb_lock);
        /* should be initialized for __put_super_and_need_restart() */
-        list_del_init(&sb->s_instances);
+        hlist_del_init(&sb->s_instances);
        spin_unlock(&sb_lock);
        up_write(&sb->s_umount);
 }
@@ -420,13 +422,14 @@ struct super_block *sget(struct file_system_type *type,
                        void *data)
 {
        struct super_block *s = NULL;
+        struct hlist_node *node;
        struct super_block *old;
        int err;
 retry:
        spin_lock(&sb_lock);
        if (test) {
-                list_for_each_entry(old, &type->fs_supers, s_instances) {
+                hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
                        if (!test(old, data))
                                continue;
                        if (!grab_super(old))
@@ -462,7 +465,7 @@ retry:
        s->s_type = type;
        strlcpy(s->s_id, type->name, sizeof(s->s_id));
        list_add_tail(&s->s_list, &super_blocks);
-        list_add(&s->s_instances, &type->fs_supers);
+        hlist_add_head(&s->s_instances, &type->fs_supers);
        spin_unlock(&sb_lock);
        get_filesystem(type);
        register_shrinker(&s->s_shrink);
@@ -497,14 +500,14 @@ void sync_supers(void)
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
+                if (hlist_unhashed(&sb->s_instances))
                        continue;
                if (sb->s_op->write_super && sb->s_dirt) {
                        sb->s_count++;
                        spin_unlock(&sb_lock);
                        down_read(&sb->s_umount);
-                        if (sb->s_root && sb->s_dirt)
+                        if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN))
                                sb->s_op->write_super(sb);
                        up_read(&sb->s_umount);
@@ -533,13 +536,13 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
+                if (hlist_unhashed(&sb->s_instances))
                        continue;
                sb->s_count++;
                spin_unlock(&sb_lock);
                down_read(&sb->s_umount);
-                if (sb->s_root)
+                if (sb->s_root && (sb->s_flags & MS_BORN))
                        f(sb, arg);
                up_read(&sb->s_umount);
@@ -566,14 +569,15 @@ void iterate_supers_type(struct file_system_type *type,
        void (*f)(struct super_block *, void *), void *arg)
 {
        struct super_block *sb, *p = NULL;
+        struct hlist_node *node;
        spin_lock(&sb_lock);
-        list_for_each_entry(sb, &type->fs_supers, s_instances) {
+        hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
                sb->s_count++;
                spin_unlock(&sb_lock);
                down_read(&sb->s_umount);
-                if (sb->s_root)
+                if (sb->s_root && (sb->s_flags & MS_BORN))
                        f(sb, arg);
                up_read(&sb->s_umount);
@@ -607,14 +611,14 @@ struct super_block *get_super(struct block_device *bdev)
        spin_lock(&sb_lock);
 rescan:
        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
+                if (hlist_unhashed(&sb->s_instances))
                        continue;
                if (sb->s_bdev == bdev) {
                        sb->s_count++;
                        spin_unlock(&sb_lock);
                        down_read(&sb->s_umount);
                        /* still alive? */
-                        if (sb->s_root)
+                        if (sb->s_root && (sb->s_flags & MS_BORN))
                                return sb;
                        up_read(&sb->s_umount);
                        /* nope, got unmounted */
@@ -647,7 +651,7 @@ struct super_block *get_active_super(struct block_device *bdev)
 restart:
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
+                if (hlist_unhashed(&sb->s_instances))
                        continue;
                if (sb->s_bdev == bdev) {
                        if (grab_super(sb)) /* drops sb_lock */
@@ -667,14 +671,14 @@ struct super_block *user_get_super(dev_t dev)
        spin_lock(&sb_lock);
 rescan:
        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
+                if (hlist_unhashed(&sb->s_instances))
                        continue;
                if (sb->s_dev ==  dev) {
                        sb->s_count++;
                        spin_unlock(&sb_lock);
                        down_read(&sb->s_umount);
                        /* still alive? */
-                        if (sb->s_root)
+                        if (sb->s_root && (sb->s_flags & MS_BORN))
                                return sb;
                        up_read(&sb->s_umount);
                        /* nope, got unmounted */
@@ -719,23 +723,29 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
        /* If we are remounting RDONLY and current sb is read/write,
           make sure there are no rw files opened */
        if (remount_ro) {
-                if (force)
+                if (force) {
                        mark_files_ro(sb);
-                else if (!fs_may_remount_ro(sb))
+                } else {
-                        return -EBUSY;
+                        retval = sb_prepare_remount_readonly(sb);
+                        if (retval)
+                                return retval;
+                }
        }
        if (sb->s_op->remount_fs) {
                retval = sb->s_op->remount_fs(sb, &flags, data);
                if (retval) {
                        if (!force)
-                                return retval;
+                                goto cancel_readonly;
                        /* If forced remount, go ahead despite any errors */
                        WARN(1, "forced remount of a %s fs returned %i\n",
                             sb->s_type->name, retval);
                }
        }
        sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
+        /* Needs to be ordered wrt mnt_is_readonly() */
+        smp_wmb();
+        sb->s_readonly_remount = 0;
        /*
         * Some filesystems modify their metadata via some other path than the
@@ -748,6 +758,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
        if (remount_ro && sb->s_bdev)
                invalidate_bdev(sb->s_bdev);
        return 0;
+cancel_readonly:
+        sb->s_readonly_remount = 0;
+        return retval;
 }
 static void do_emergency_remount(struct work_struct *work)
@@ -756,12 +770,13 @@ static void do_emergency_remount(struct work_struct *work)
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
+                if (hlist_unhashed(&sb->s_instances))
                        continue;
                sb->s_count++;
                spin_unlock(&sb_lock);
                down_write(&sb->s_umount);
-                if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) {
+                if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
+                    !(sb->s_flags & MS_RDONLY)) {
                        /*
                         * What lock protects sb->s_flags??
                         */
@@ -1144,6 +1159,11 @@ int freeze_super(struct super_block *sb)
                return -EBUSY;
        }
+        if (!(sb->s_flags & MS_BORN)) {
+                up_write(&sb->s_umount);
+                return 0;       /* sic - it's "nothing to do" */
+        }
        if (sb->s_flags & MS_RDONLY) {
                sb->s_frozen = SB_FREEZE_TRANS;
                smp_wmb();
diff --git a/fs/sync.c b/fs/sync.c
index 101b8ef901d7..f3501ef39235 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,7 +14,6 @@
 #include <linux/linkage.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
-#include <linux/buffer_head.h>
 #include <linux/backing-dev.h>
 #include "internal.h"
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d4e6080b4b20..62f4fb37789e 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -518,7 +518,7 @@ out:
 }
 int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
-                        const struct attribute *attr, int type, mode_t amode)
+                        const struct attribute *attr, int type, umode_t amode)
 {
        umode_t mode = (amode & S_IALLUGO) | S_IFREG;
        struct sysfs_addrm_cxt acxt;
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
 *
 */
 int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
-                     mode_t mode)
+                     umode_t mode)
 {
        struct sysfs_dirent *sd;
        struct iattr newattrs;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 194414f8298c..dd1701caecc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -33,7 +33,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
        int error = 0, i;
        for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
-                mode_t mode = 0;
+                umode_t mode = 0;
                /* in update mode, we're changing the permissions or
                 * visibility.  Do this by first removing then
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index c81b22f3ace1..4a802b4a9056 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -187,7 +187,7 @@ out:
        return error;
 }
-static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
+static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
 {
        inode->i_mode = mode;
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce29e28b766d..7484a36ee678 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -79,7 +79,7 @@ struct sysfs_dirent {
        };
        unsigned int            s_flags;
-        unsigned short          s_mode;
+        umode_t                 s_mode;
        ino_t                   s_ino;
        struct sysfs_inode_attrs *s_iattr;
 };
@@ -229,7 +229,7 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd,
                   const struct attribute *attr, int type);
 int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
-                        const struct attribute *attr, int type, mode_t amode);
+                        const struct attribute *attr, int type, umode_t amode);
 /*
 * bin.c
 */
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 0c96c98bd1db..8233b02eccae 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -132,7 +132,7 @@ void sysv_free_inode(struct inode * inode)
        brelse(bh);
 }
-struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
+struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct sysv_sb_info *sbi = SYSV_SB(sb);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 25ffb3e9a3f8..3da5ce25faf0 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -336,7 +336,6 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
 static void sysv_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
 }
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index fa8d43c92bb8..90b54b438789 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -442,7 +442,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
 int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-        struct super_block *s = mnt->mnt_sb;
+        struct super_block *s = dentry->d_sb;
        generic_fillattr(dentry->d_inode, stat);
        stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
        stat->blksize = s->s_blocksize;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index e474fbcf8bde..b217797e621b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -61,7 +61,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
        return NULL;
 }
-static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev)
+static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
 {
        struct inode * inode;
        int err;
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_
        return err;
 }
-static int sysv_create(struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd)
+static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
 {
        return sysv_mknod(dir, dentry, mode, 0);
 }
@@ -131,7 +131,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
        return add_nondir(dentry, inode);
 }
-static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode)
+static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
 {
        struct inode * inode;
        int err = -EMLINK;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index bb55cdb394bf..0e4b821c5691 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -125,7 +125,7 @@ static inline void dirty_sb(struct super_block *sb)
 /* ialloc.c */
 extern struct sysv_inode *sysv_raw_inode(struct super_block *, unsigned,
                        struct buffer_head **);
-extern struct inode * sysv_new_inode(const struct inode *, mode_t);
+extern struct inode * sysv_new_inode(const struct inode *, umode_t);
 extern void sysv_free_inode(struct inode *);
 extern unsigned long sysv_count_free_inodes(struct super_block *);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 683492043317..d6fe1c79f18b 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -56,7 +56,7 @@
 *
 * This function returns the inherited flags.
 */
-static int inherit_flags(const struct inode *dir, int mode)
+static int inherit_flags(const struct inode *dir, umode_t mode)
 {
        int flags;
        const struct ubifs_inode *ui = ubifs_inode(dir);
@@ -86,7 +86,7 @@ static int inherit_flags(const struct inode *dir, int mode)
 * case of failure.
 */
 struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
-                              int mode)
+                              umode_t mode)
 {
        struct inode *inode;
        struct ubifs_inode *ui;
@@ -253,7 +253,7 @@ out:
        return ERR_PTR(err);
 }
-static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
+static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                        struct nameidata *nd)
 {
        struct inode *inode;
@@ -268,7 +268,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
         * parent directory inode.
         */
-        dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
+        dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
                dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
        err = ubifs_budget_space(c, &req);
@@ -712,7 +712,7 @@ out_cancel:
        return err;
 }
-static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -725,7 +725,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
         * directory inode.
         */
-        dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
+        dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
                dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
        err = ubifs_budget_space(c, &req);
@@ -769,7 +769,7 @@ out_budg:
 }
 static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
-                       int mode, dev_t rdev)
+                       umode_t mode, dev_t rdev)
 {
        struct inode *inode;
        struct ubifs_inode *ui;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 548acf494afd..1a7e2d8bdbe9 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -173,12 +173,12 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                 * Make sure the file-system is read-write and make sure it
                 * will not become read-only while we are changing the flags.
                 */
-                err = mnt_want_write(file->f_path.mnt);
+                err = mnt_want_write_file(file);
                if (err)
                        return err;
                dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
                err = setflags(inode, flags);
-                mnt_drop_write(file->f_path.mnt);
+                mnt_drop_write_file(file);
                return err;
        }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 20403dc5d437..63765d58445b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -276,7 +276,6 @@ static void ubifs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
        struct ubifs_inode *ui = ubifs_inode(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ubifs_inode_slab, ui);
 }
@@ -420,9 +419,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
        return 0;
 }
-static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
+static int ubifs_show_options(struct seq_file *s, struct dentry *root)
 {
-        struct ubifs_info *c = mnt->mnt_sb->s_fs_info;
+        struct ubifs_info *c = root->d_sb->s_fs_info;
        if (c->mount_opts.unmount_mode == 2)
                seq_printf(s, ",fast_unmount");
@@ -2264,19 +2263,12 @@ static int __init ubifs_init(void)
                return -EINVAL;
        }
-        err = register_filesystem(&ubifs_fs_type);
-        if (err) {
-                ubifs_err("cannot register file system, error %d", err);
-                return err;
-        }
-        err = -ENOMEM;
        ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
                                sizeof(struct ubifs_inode), 0,
                                SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
                                &inode_slab_ctor);
        if (!ubifs_inode_slab)
-                goto out_reg;
+                return -ENOMEM;
        register_shrinker(&ubifs_shrinker_info);
@@ -2288,15 +2280,20 @@ static int __init ubifs_init(void)
        if (err)
                goto out_compr;
+        err = register_filesystem(&ubifs_fs_type);
+        if (err) {
+                ubifs_err("cannot register file system, error %d", err);
+                goto out_dbg;
+        }
        return 0;
+out_dbg:
+        dbg_debugfs_exit();
 out_compr:
        ubifs_compressors_exit();
 out_shrinker:
        unregister_shrinker(&ubifs_shrinker_info);
        kmem_cache_destroy(ubifs_inode_slab);
-out_reg:
-        unregister_filesystem(&ubifs_fs_type);
        return err;
 }
 /* late_initcall to let compressors initialize first */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 27f22551f805..12e94774aa88 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1734,7 +1734,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
-                              int mode);
+                              umode_t mode);
 int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat);
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 6fb7e0adcda0..05ab48195be9 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -46,7 +46,7 @@ void udf_free_inode(struct inode *inode)
        udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
 }
-struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
+struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
 {
        struct super_block *sb = dir->i_sb;
        struct udf_sb_info *sbi = UDF_SB(sb);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4fd1d809738c..4598904be1bb 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -48,7 +48,7 @@ MODULE_LICENSE("GPL");
 #define EXTENT_MERGE_SIZE 5
-static mode_t udf_convert_permissions(struct fileEntry *);
+static umode_t udf_convert_permissions(struct fileEntry *);
 static int udf_update_inode(struct inode *, int);
 static void udf_fill_inode(struct inode *, struct buffer_head *);
 static int udf_sync_inode(struct inode *inode);
@@ -1452,9 +1452,9 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
        return 0;
 }
-static mode_t udf_convert_permissions(struct fileEntry *fe)
+static umode_t udf_convert_permissions(struct fileEntry *fe)
 {
-        mode_t mode;
+        umode_t mode;
        uint32_t permissions;
        uint32_t flags;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 4639e137222f..08bf46edf9c4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -552,7 +552,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
        return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
 }
-static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
+static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                      struct nameidata *nd)
 {
        struct udf_fileident_bh fibh;
@@ -596,7 +596,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
        return 0;
 }
-static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
+static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
                     dev_t rdev)
 {
        struct inode *inode;
@@ -640,7 +640,7 @@ out:
        return err;
 }
-static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        struct udf_fileident_bh fibh;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e185253470df..0c33225647a0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -89,7 +89,7 @@ static void udf_open_lvid(struct super_block *);
 static void udf_close_lvid(struct super_block *);
 static unsigned int udf_count_free(struct super_block *);
 static int udf_statfs(struct dentry *, struct kstatfs *);
-static int udf_show_options(struct seq_file *, struct vfsmount *);
+static int udf_show_options(struct seq_file *, struct dentry *);
 struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 {
@@ -138,7 +138,6 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
 static void udf_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(udf_inode_cachep, UDF_I(inode));
 }
@@ -196,11 +195,11 @@ struct udf_options {
        unsigned int fileset;
        unsigned int rootdir;
        unsigned int flags;
-        mode_t umask;
+        umode_t umask;
        gid_t gid;
        uid_t uid;
-        mode_t fmode;
+        umode_t fmode;
-        mode_t dmode;
+        umode_t dmode;
        struct nls_table *nls_map;
 };
@@ -250,9 +249,9 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
        return 0;
 }
-static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
+static int udf_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct super_block *sb = mnt->mnt_sb;
+        struct super_block *sb = root->d_sb;
        struct udf_sb_info *sbi = UDF_SB(sb);
        if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT))
@@ -280,11 +279,11 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
                seq_printf(seq, ",gid=%u", sbi->s_gid);
        if (sbi->s_umask != 0)
-                seq_printf(seq, ",umask=%o", sbi->s_umask);
+                seq_printf(seq, ",umask=%ho", sbi->s_umask);
        if (sbi->s_fmode != UDF_INVALID_MODE)
-                seq_printf(seq, ",mode=%o", sbi->s_fmode);
+                seq_printf(seq, ",mode=%ho", sbi->s_fmode);
        if (sbi->s_dmode != UDF_INVALID_MODE)
-                seq_printf(seq, ",dmode=%o", sbi->s_dmode);
+                seq_printf(seq, ",dmode=%ho", sbi->s_dmode);
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET))
                seq_printf(seq, ",session=%u", sbi->s_session);
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET))
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 5142a82e3276..42ad69ac9576 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -50,7 +50,7 @@
 #define UDF_SPARABLE_MAP15              0x1522U
 #define UDF_METADATA_MAP25              0x2511U
-#define UDF_INVALID_MODE                ((mode_t)-1)
+#define UDF_INVALID_MODE                ((umode_t)-1)
 #pragma pack(1) /* XXX(hch): Why?  This file just defines in-core structures */
@@ -127,11 +127,11 @@ struct udf_sb_info {
        struct buffer_head      *s_lvid_bh;
        /* Default permissions */
-        mode_t                  s_umask;
+        umode_t                 s_umask;
        gid_t                   s_gid;
        uid_t                   s_uid;
-        mode_t                  s_fmode;
+        umode_t                 s_fmode;
-        mode_t                  s_dmode;
+        umode_t                 s_dmode;
        /* Lock protecting consistency of above permission settings */
        rwlock_t                s_cred_lock;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f34e6fc0cdaa..ebe10314e512 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -215,7 +215,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
 /* ialloc.c */
 extern void udf_free_inode(struct inode *);
-extern struct inode *udf_new_inode(struct inode *, int, int *);
+extern struct inode *udf_new_inode(struct inode *, umode_t, int *);
 /* truncate.c */
 extern void udf_truncate_tail_extent(struct inode *);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 78a4c70d46b5..4ec5c1085a87 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -170,7 +170,7 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
 * For other inodes, search forward from the parent directory's block
 * group to find a free inode.
 */
-struct inode * ufs_new_inode(struct inode * dir, int mode)
+struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
 {
        struct super_block * sb;
        struct ufs_sb_info * sbi;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 879b13436fa4..9094e1d917be 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -583,7 +583,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 {
        struct ufs_inode_info *ufsi = UFS_I(inode);
        struct super_block *sb = inode->i_sb;
-        mode_t mode;
+        umode_t mode;
        /*
         * Copy data to the in-core inode.
@@ -630,7 +630,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 {
        struct ufs_inode_info *ufsi = UFS_I(inode);
        struct super_block *sb = inode->i_sb;
-        mode_t mode;
+        umode_t mode;
        UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
        /*
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 639d49162241..38cac199edff 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -70,7 +70,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
 * If the create succeeds, we fill in the inode information
 * with d_instantiate(). 
 */
-static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
+static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
                struct nameidata *nd)
 {
        struct inode *inode;
@@ -94,7 +94,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
        return err;
 }
-static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)
+static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode *inode;
        int err;
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
        return error;
 }
-static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
 {
        struct inode * inode;
        int err = -EMLINK;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3915ade6f9a8..5246ee3e5607 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1351,9 +1351,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
        return 0;
 }
-static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ufs_show_options(struct seq_file *seq, struct dentry *root)
 {
-        struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
+        struct ufs_sb_info *sbi = UFS_SB(root->d_sb);
        unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
        const struct match_token *tp = tokens;
@@ -1425,7 +1425,6 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
 static void ufs_i_callback(struct rcu_head *head)
 {
        struct inode *inode = container_of(head, struct inode, i_rcu);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index c26f2bcec264..528750b7e701 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -104,7 +104,7 @@ extern const struct address_space_operations ufs_aops;
 /* ialloc.c */
 extern void ufs_free_inode (struct inode *inode);
-extern struct inode * ufs_new_inode (struct inode *, int);
+extern struct inode * ufs_new_inode (struct inode *, umode_t);
 /* inode.c */
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
diff --git a/fs/xattr.c b/fs/xattr.c
index 67583de8218c..82f43376c7cd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -397,7 +397,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
        error = mnt_want_write_file(f);
        if (!error) {
                error = setxattr(dentry, name, value, size, flags);
-                mnt_drop_write(f->f_path.mnt);
+                mnt_drop_write_file(f);
        }
        fput(f);
        return error;
@@ -624,7 +624,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
        error = mnt_want_write_file(f);
        if (!error) {
                error = removexattr(dentry, name);
-                mnt_drop_write(f->f_path.mnt);
+                mnt_drop_write_file(f);
        }
        fput(f);
        return error;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 76e4266d2e7e..ac702a6eab9b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
        struct posix_acl_entry *acl_e;
        struct posix_acl *acl;
        struct xfs_acl_entry *ace;
-        int count, i;
+        unsigned int count, i;
        count = be32_to_cpu(aclp->acl_cnt);
        if (count > XFS_ACL_MAX_ENTRIES)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac056815f..4dff85c7d7eb 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1370,7 +1370,7 @@ restart:
                        goto restart;
                }
                /*
-                 * clear the LRU reference count so the bufer doesn't get
+                 * clear the LRU reference count so the buffer doesn't get
                 * ignored in xfs_buf_rele().
                 */
                atomic_set(&bp->b_lru_ref, 0);
@@ -1701,12 +1701,8 @@ xfsbufd(
                struct list_head tmp;
                struct blk_plug plug;
-                if (unlikely(freezing(current))) {
+                if (unlikely(freezing(current)))
-                        set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+                        try_to_freeze();
-                        refrigerator();
-                } else {
-                        clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-                }
                /* sleep for a long time if there is nothing to do. */
                if (list_empty(&target->bt_delwri_queue))
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5bab046e859f..df7ffb0affe7 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
        { _XBF_DELWRI_Q,        "DELWRI_Q" }
 typedef enum {
-        XBT_FORCE_SLEEP = 0,
+        XBT_FORCE_FLUSH = 0,
-        XBT_FORCE_FLUSH = 1,
 } xfs_buftarg_flags_t;
 typedef struct xfs_buftarg {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 25d7280e9f6b..b4ff40b5f918 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -39,20 +39,19 @@
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 /*
-   LOCK ORDER
+ * Lock order:
+ *
-   inode lock               (ilock)
+ * ip->i_lock
-   dquot hash-chain lock    (hashlock)
+ *   qh->qh_lock
-   xqm dquot freelist lock  (freelistlock
+ *     qi->qi_dqlist_lock
-   mount's dquot list lock  (mplistlock)
+ *       dquot->q_qlock (xfs_dqlock() and friends)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
+ *         dquot->q_flush (xfs_dqflock() and friends)
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
+ *         xfs_Gqm->qm_dqfrlist_lock
-                      has to be locked first.
+ *
-   pin lock - the dquot lock must be held to take this lock.
+ * If two dquots need to be locked the order is user before group/project,
-   flush lock - ditto.
+ * otherwise by the lowest id first, see xfs_dqlock2.
-*/
+ */
 #ifdef DEBUG
 xfs_buftarg_t *xfs_dqerror_target;
@@ -155,24 +154,6 @@ xfs_qm_dqdestroy(
 }
 /*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-        xfs_dqid_t      id,
-        uint            type,
-        xfs_dqblk_t     *d)
-{
-        /*
-         * Caller has zero'd the entire dquot 'chunk' already.
-         */
-        d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-        d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-        d->dd_diskdq.d_id = cpu_to_be32(id);
-        d->dd_diskdq.d_flags = type;
-}
-/*
 * If default limits are in force, push them into the dquot now.
 * We overwrite the dquot limits only if they are zero and this
 * is not the root dquot.
@@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(
        curid = id - (id % q->qi_dqperchunk);
        ASSERT(curid >= 0);
        memset(d, 0, BBTOB(q->qi_dqchunklen));
-        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
+        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
-                xfs_qm_dqinit_core(curid, type, d);
+                d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+                d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+                d->dd_diskdq.d_id = cpu_to_be32(curid);
+                d->dd_diskdq.d_flags = type;
+        }
        xfs_trans_dquot_buf(tp, bp,
                            (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
                            ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
@@ -564,36 +550,62 @@ xfs_qm_dqtobp(
 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
 * and release the buffer immediately.
 *
+ * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
 */
-/* ARGSUSED */
+int
-STATIC int
 xfs_qm_dqread(
-        xfs_trans_t     **tpp,
+        struct xfs_mount        *mp,
-        xfs_dqid_t      id,
+        xfs_dqid_t              id,
-        xfs_dquot_t     *dqp,   /* dquot to get filled in */
+        uint                    type,
-        uint            flags)
+        uint                    flags,
+        struct xfs_dquot        **O_dqpp)
 {
-        xfs_disk_dquot_t *ddqp;
+        struct xfs_dquot        *dqp;
-        xfs_buf_t        *bp;
+        struct xfs_disk_dquot   *ddqp;
-        int              error;
+        struct xfs_buf          *bp;
-        xfs_trans_t      *tp;
+        struct xfs_trans        *tp = NULL;
+        int                     error;
+        int                     cancelflags = 0;
-        ASSERT(tpp);
+        dqp = xfs_qm_dqinit(mp, id, type);
        trace_xfs_dqread(dqp);
+        if (flags & XFS_QMOPT_DQALLOC) {
+                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+                error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+                                XFS_WRITE_LOG_RES(mp) +
+                                /*
+                                 * Round the chunklen up to the next multiple
+                                 * of 128 (buf log item chunk size)).
+                                 */
+                                BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
+                                0,
+                                XFS_TRANS_PERM_LOG_RES,
+                                XFS_WRITE_LOG_COUNT);
+                if (error)
+                        goto error1;
+                cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+        }
        /*
         * get a pointer to the on-disk dquot and the buffer containing it
         * dqp already knows its own type (GROUP/USER).
         */
-        if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
+        error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
-                return (error);
+        if (error) {
+                /*
+                 * This can happen if quotas got turned off (ESRCH),
+                 * or if the dquot didn't exist on disk and we ask to
+                 * allocate (ENOENT).
+                 */
+                trace_xfs_dqread_fail(dqp);
+                cancelflags |= XFS_TRANS_ABORT;
+                goto error1;
        }
-        tp = *tpp;
        /* copy everything from disk dquot to the incore dquot */
        memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-        ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
        xfs_qm_dquot_logitem_init(dqp);
        /*
@@ -622,77 +634,22 @@ xfs_qm_dqread(
        ASSERT(xfs_buf_islocked(bp));
        xfs_trans_brelse(tp, bp);
-        return (error);
-}
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-        xfs_mount_t     *mp,
-        xfs_dqid_t      id,      /* gid or uid, depending on type */
-        uint            type,    /* UDQUOT or GDQUOT */
-        uint            flags,   /* DQALLOC, DQREPAIR */
-        xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
-{
-        xfs_dquot_t     *dqp;
-        int             error;
-        xfs_trans_t     *tp;
-        int             cancelflags=0;
-        dqp = xfs_qm_dqinit(mp, id, type);
-        tp = NULL;
-        if (flags & XFS_QMOPT_DQALLOC) {
-                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-                error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-                                XFS_WRITE_LOG_RES(mp) +
-                                BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
-                                128,
-                                0,
-                                XFS_TRANS_PERM_LOG_RES,
-                                XFS_WRITE_LOG_COUNT);
-                if (error) {
-                        cancelflags = 0;
-                        goto error0;
-                }
-                cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-        }
-        /*
-         * Read it from disk; xfs_dqread() takes care of
-         * all the necessary initialization of dquot's fields (locks, etc)
-         */
-        if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
-                /*
-                 * This can happen if quotas got turned off (ESRCH),
-                 * or if the dquot didn't exist on disk and we ask to
-                 * allocate (ENOENT).
-                 */
-                trace_xfs_dqread_fail(dqp);
-                cancelflags |= XFS_TRANS_ABORT;
-                goto error0;
-        }
        if (tp) {
-                if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
+                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-                        goto error1;
+                if (error)
+                        goto error0;
        }
        *O_dqpp = dqp;
-        return (0);
+        return error;
- error0:
+error1:
-        ASSERT(error);
        if (tp)
                xfs_trans_cancel(tp, cancelflags);
- error1:
+error0:
        xfs_qm_dqdestroy(dqp);
        *O_dqpp = NULL;
-        return (error);
+        return error;
 }
 /*
@@ -710,12 +667,9 @@ xfs_qm_dqlookup(
        xfs_dquot_t             **O_dqpp)
 {
        xfs_dquot_t             *dqp;
-        uint                    flist_locked;
        ASSERT(mutex_is_locked(&qh->qh_lock));
-        flist_locked = B_FALSE;
        /*
         * Traverse the hashchain looking for a match
         */
@@ -725,70 +679,31 @@ xfs_qm_dqlookup(
                 * dqlock to look at the id field of the dquot, since the
                 * id can't be modified without the hashlock anyway.
                 */
-                if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
+                if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
-                        trace_xfs_dqlookup_found(dqp);
+                        continue;
-                        /*
+                trace_xfs_dqlookup_found(dqp);
-                         * All in core dquots must be on the dqlist of mp
-                         */
-                        ASSERT(!list_empty(&dqp->q_mplist));
-                        xfs_dqlock(dqp);
-                        if (dqp->q_nrefs == 0) {
-                                ASSERT(!list_empty(&dqp->q_freelist));
-                                if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-                                        trace_xfs_dqlookup_want(dqp);
-                                        /*
-                                         * We may have raced with dqreclaim_one()
-                                         * (and lost). So, flag that we don't
-                                         * want the dquot to be reclaimed.
-                                         */
-                                        dqp->dq_flags |= XFS_DQ_WANT;
-                                        xfs_dqunlock(dqp);
-                                        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-                                        xfs_dqlock(dqp);
-                                        dqp->dq_flags &= ~(XFS_DQ_WANT);
-                                }
-                                flist_locked = B_TRUE;
-                        }
-                        /*
+                xfs_dqlock(dqp);
-                         * id couldn't have changed; we had the hashlock all
+                if (dqp->dq_flags & XFS_DQ_FREEING) {
-                         * along
+                        *O_dqpp = NULL;
-                         */
+                        xfs_dqunlock(dqp);
-                        ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+                        return -1;
+                }
-                        if (flist_locked) {
-                                if (dqp->q_nrefs != 0) {
-                                        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                                        flist_locked = B_FALSE;
-                                } else {
-                                        /* take it off the freelist */
-                                        trace_xfs_dqlookup_freelist(dqp);
-                                        list_del_init(&dqp->q_freelist);
-                                        xfs_Gqm->qm_dqfrlist_cnt--;
-                                }
-                        }
-                        XFS_DQHOLD(dqp);
+                dqp->q_nrefs++;
-                        if (flist_locked)
+                /*
-                                mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                 * move the dquot to the front of the hashchain
-                        /*
+                 */
-                         * move the dquot to the front of the hashchain
+                list_move(&dqp->q_hashlist, &qh->qh_list);
-                         */
+                trace_xfs_dqlookup_done(dqp);
-                        ASSERT(mutex_is_locked(&qh->qh_lock));
+                *O_dqpp = dqp;
-                        list_move(&dqp->q_hashlist, &qh->qh_list);
+                return 0;
-                        trace_xfs_dqlookup_done(dqp);
-                        *O_dqpp = dqp;
-                        return 0;
-                }
        }
        *O_dqpp = NULL;
-        ASSERT(mutex_is_locked(&qh->qh_lock));
+        return 1;
-        return (1);
 }
 /*
@@ -829,11 +744,7 @@ xfs_qm_dqget(
                        return (EIO);
                }
        }
-#endif
- again:
-#ifdef DEBUG
        ASSERT(type == XFS_DQ_USER ||
               type == XFS_DQ_PROJ ||
               type == XFS_DQ_GROUP);
@@ -845,13 +756,21 @@ xfs_qm_dqget(
                        ASSERT(ip->i_gdquot == NULL);
        }
 #endif
+restart:
        mutex_lock(&h->qh_lock);
        /*
         * Look in the cache (hashtable).
         * The chain is kept locked during lookup.
         */
-        if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+        switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
+        case -1:
+                XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+                mutex_unlock(&h->qh_lock);
+                delay(1);
+                goto restart;
+        case 0:
                XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
                /*
                 * The dquot was found, moved to the front of the chain,
@@ -862,9 +781,11 @@ xfs_qm_dqget(
                ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
                mutex_unlock(&h->qh_lock);
                trace_xfs_dqget_hit(*O_dqpp);
-                return (0);     /* success */
+                return 0;       /* success */
+        default:
+                XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+                break;
        }
-        XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
        /*
         * Dquot cache miss. We don't want to keep the inode lock across
@@ -882,41 +803,18 @@ xfs_qm_dqget(
        version = h->qh_version;
        mutex_unlock(&h->qh_lock);
-        /*
+        error = xfs_qm_dqread(mp, id, type, flags, &dqp);
-         * Allocate the dquot on the kernel heap, and read the ondisk
-         * portion off the disk. Also, do all the necessary initialization
-         * This can return ENOENT if dquot didn't exist on disk and we didn't
-         * ask it to allocate; ESRCH if quotas got turned off suddenly.
-         */
-        if ((error = xfs_qm_idtodq(mp, id, type,
-                                  flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-                                           XFS_QMOPT_DOWARN),
-                                  &dqp))) {
-                if (ip)
-                        xfs_ilock(ip, XFS_ILOCK_EXCL);
-                return (error);
-        }
-        /*
+        if (ip)
-         * See if this is mount code calling to look at the overall quota limits
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
-         * which are stored in the id == 0 user or group's dquot.
-         * Since we may not have done a quotacheck by this point, just return
+        if (error)
-         * the dquot without attaching it to any hashtables, lists, etc, or even
+                return error;
-         * taking a reference.
-         * The caller must dqdestroy this once done.
-         */
-        if (flags & XFS_QMOPT_DQSUSER) {
-                ASSERT(id == 0);
-                ASSERT(! ip);
-                goto dqret;
-        }
        /*
         * Dquot lock comes after hashlock in the lock ordering
         */
        if (ip) {
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
                /*
                 * A dquot could be attached to this inode by now, since
                 * we had dropped the ilock.
@@ -961,16 +859,21 @@ xfs_qm_dqget(
                 * lock order between the two dquots here since dqp isn't
                 * on any findable lists yet.
                 */
-                if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+                switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
+                case 0:
+                case -1:
                        /*
-                         * Duplicate found. Just throw away the new dquot
+                         * Duplicate found, either in cache or on its way out.
-                         * and start over.
+                         * Just throw away the new dquot and start over.
                         */
-                        xfs_qm_dqput(tmpdqp);
+                        if (tmpdqp)
+                                xfs_qm_dqput(tmpdqp);
                        mutex_unlock(&h->qh_lock);
                        xfs_qm_dqdestroy(dqp);
                        XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                        goto again;
+                        goto restart;
+                default:
+                        break;
                }
        }
@@ -1015,67 +918,49 @@ xfs_qm_dqget(
 */
 void
 xfs_qm_dqput(
-        xfs_dquot_t     *dqp)
+        struct xfs_dquot        *dqp)
 {
-        xfs_dquot_t     *gdqp;
+        struct xfs_dquot        *gdqp;
        ASSERT(dqp->q_nrefs > 0);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
        trace_xfs_dqput(dqp);
-        if (dqp->q_nrefs != 1) {
+recurse:
-                dqp->q_nrefs--;
+        if (--dqp->q_nrefs > 0) {
                xfs_dqunlock(dqp);
                return;
        }
+        trace_xfs_dqput_free(dqp);
+        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+        if (list_empty(&dqp->q_freelist)) {
+                list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+                xfs_Gqm->qm_dqfrlist_cnt++;
+        }
+        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
        /*
-         * drop the dqlock and acquire the freelist and dqlock
+         * If we just added a udquot to the freelist, then we want to release
-         * in the right order; but try to get it out-of-order first
+         * the gdquot reference that it (probably) has. Otherwise it'll keep
+         * the gdquot from getting reclaimed.
         */
-        if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+        gdqp = dqp->q_gdquot;
-                trace_xfs_dqput_wait(dqp);
+        if (gdqp) {
-                xfs_dqunlock(dqp);
+                xfs_dqlock(gdqp);
-                mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+                dqp->q_gdquot = NULL;
-                xfs_dqlock(dqp);
        }
+        xfs_dqunlock(dqp);
-        while (1) {
+        /*
-                gdqp = NULL;
+         * If we had a group quota hint, release it now.
+         */
-                /* We can't depend on nrefs being == 1 here */
+        if (gdqp) {
-                if (--dqp->q_nrefs == 0) {
-                        trace_xfs_dqput_free(dqp);
-                        list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-                        xfs_Gqm->qm_dqfrlist_cnt++;
-                        /*
-                         * If we just added a udquot to the freelist, then
-                         * we want to release the gdquot reference that
-                         * it (probably) has. Otherwise it'll keep the
-                         * gdquot from getting reclaimed.
-                         */
-                        if ((gdqp = dqp->q_gdquot)) {
-                                /*
-                                 * Avoid a recursive dqput call
-                                 */
-                                xfs_dqlock(gdqp);
-                                dqp->q_gdquot = NULL;
-                        }
-                }
-                xfs_dqunlock(dqp);
-                /*
-                 * If we had a group quota inside the user quota as a hint,
-                 * release it now.
-                 */
-                if (! gdqp)
-                        break;
                dqp = gdqp;
+                goto recurse;
        }
-        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
 }
 /*
@@ -1169,7 +1054,7 @@ xfs_qm_dqflush(
         * If not dirty, or it's pinned and we are not supposed to block, nada.
         */
        if (!XFS_DQ_IS_DIRTY(dqp) ||
-            (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+            ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
                xfs_dqfunlock(dqp);
                return 0;
        }
@@ -1257,40 +1142,17 @@ xfs_qm_dqflush(
 }
-int
-xfs_qm_dqlock_nowait(
-        xfs_dquot_t *dqp)
-{
-        return mutex_trylock(&dqp->q_qlock);
-}
-void
-xfs_dqlock(
-        xfs_dquot_t *dqp)
-{
-        mutex_lock(&dqp->q_qlock);
-}
 void
 xfs_dqunlock(
        xfs_dquot_t *dqp)
 {
-        mutex_unlock(&(dqp->q_qlock));
+        xfs_dqunlock_nonotify(dqp);
        if (dqp->q_logitem.qli_dquot == dqp) {
-                /* Once was dqp->q_mount, but might just have been cleared */
                xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-                                        (xfs_log_item_t*)&(dqp->q_logitem));
+                                        &dqp->q_logitem.qli_item);
        }
 }
-void
-xfs_dqunlock_nonotify(
-        xfs_dquot_t *dqp)
-{
-        mutex_unlock(&(dqp->q_qlock));
-}
 /*
 * Lock two xfs_dquot structures.
 *
@@ -1319,43 +1181,18 @@ xfs_dqlock2(
        }
 }
 /*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * Take a dquot out of the mount's dqlist as well as the hashlist.  This is
- * This is called via unmount as well as quotaoff, and the purge
+ * called via unmount as well as quotaoff, and the purge will always succeed.
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
 */
-/* ARGSUSED */
+void
-int
 xfs_qm_dqpurge(
-        xfs_dquot_t     *dqp)
+        struct xfs_dquot        *dqp)
 {
-        xfs_dqhash_t    *qh = dqp->q_hash;
+        struct xfs_mount        *mp = dqp->q_mount;
-        xfs_mount_t     *mp = dqp->q_mount;
+        struct xfs_dqhash       *qh = dqp->q_hash;
-        ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-        ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
        xfs_dqlock(dqp);
-        /*
-         * We really can't afford to purge a dquot that is
-         * referenced, because these are hard refs.
-         * It shouldn't happen in general because we went thru _all_ inodes in
-         * dqrele_all_inodes before calling this and didn't let the mountlock go.
-         * However it is possible that we have dquots with temporary
-         * references that are not attached to an inode. e.g. see xfs_setattr().
-         */
-        if (dqp->q_nrefs != 0) {
-                xfs_dqunlock(dqp);
-                mutex_unlock(&dqp->q_hash->qh_lock);
-                return (1);
-        }
-        ASSERT(!list_empty(&dqp->q_freelist));
        /*
         * If we're turning off quotas, we have to make sure that, for
@@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(
                 * Block on the flush lock after nudging dquot buffer,
                 * if it is incore.
                 */
-                xfs_qm_dqflock_pushbuf_wait(dqp);
+                xfs_dqflock_pushbuf_wait(dqp);
        }
        /*
-         * XXXIf we're turning this type of quotas off, we don't care
+         * If we are turning this type of quotas off, we don't care
         * about the dirty metadata sitting in this dquot. OTOH, if
         * we're unmounting, we do care, so we flush it and wait.
         */
        if (XFS_DQ_IS_DIRTY(dqp)) {
                int     error;
-                /* dqflush unlocks dqflock */
                /*
-                 * Given that dqpurge is a very rare occurrence, it is OK
-                 * that we're holding the hashlist and mplist locks
-                 * across the disk write. But, ... XXXsup
-                 *
                 * We don't care about getting disk errors here. We need
                 * to purge this dquot anyway, so we go ahead regardless.
                 */
@@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(
                                __func__, dqp);
                xfs_dqflock(dqp);
        }
        ASSERT(atomic_read(&dqp->q_pincount) == 0);
        ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
               !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+        xfs_dqfunlock(dqp);
+        xfs_dqunlock(dqp);
+        mutex_lock(&qh->qh_lock);
        list_del_init(&dqp->q_hashlist);
        qh->qh_version++;
+        mutex_unlock(&qh->qh_lock);
+        mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
        list_del_init(&dqp->q_mplist);
        mp->m_quotainfo->qi_dqreclaims++;
        mp->m_quotainfo->qi_dquots--;
+        mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
        /*
-         * XXX Move this to the front of the freelist, if we can get the
+         * We move dquots to the freelist as soon as their reference count
-         * freelist lock.
+         * hits zero, so it really should be on the freelist here.
         */
+        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
        ASSERT(!list_empty(&dqp->q_freelist));
+        list_del_init(&dqp->q_freelist);
+        xfs_Gqm->qm_dqfrlist_cnt--;
+        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-        dqp->q_mount = NULL;
+        xfs_qm_dqdestroy(dqp);
-        dqp->q_hash = NULL;
-        dqp->dq_flags = XFS_DQ_INACTIVE;
-        memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-        xfs_dqfunlock(dqp);
-        xfs_dqunlock(dqp);
-        mutex_unlock(&qh->qh_lock);
-        return (0);
 }
 /*
 * Give the buffer a little push if it is incore and
 * wait on the flush lock.
 */
 void
-xfs_qm_dqflock_pushbuf_wait(
+xfs_dqflock_pushbuf_wait(
        xfs_dquot_t     *dqp)
 {
        xfs_mount_t     *mp = dqp->q_mount;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 34b7e945dbfa..a1d91d8f1802 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -80,8 +80,6 @@ enum {
        XFS_QLOCK_NESTED,
 };
-#define XFS_DQHOLD(dqp)         ((dqp)->q_nrefs++)
 /*
 * Manage the q_flush completion queue embedded in the dquot.  This completion
 * queue synchronizes processes attempting to flush the in-core dquot back to
@@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
        complete(&dqp->q_flush);
 }
+static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
+{
+        return mutex_trylock(&dqp->q_qlock);
+}
+static inline void xfs_dqlock(struct xfs_dquot *dqp)
+{
+        mutex_lock(&dqp->q_qlock);
+}
+static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
+{
+        mutex_unlock(&dqp->q_qlock);
+}
 #define XFS_DQ_IS_LOCKED(dqp)   (mutex_is_locked(&((dqp)->q_qlock)))
 #define XFS_DQ_IS_DIRTY(dqp)    ((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)       ((dqp)->dq_flags & XFS_DQ_USER)
@@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
                                     (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
                                     (XFS_IS_OQUOTA_ON((d)->q_mount))))
+extern int              xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
+                                        uint, struct xfs_dquot  **);
 extern void             xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int              xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int              xfs_qm_dqpurge(xfs_dquot_t *);
+extern void             xfs_qm_dqpurge(xfs_dquot_t *);
 extern void             xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int              xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void             xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
 extern void             xfs_qm_adjust_dqtimers(xfs_mount_t *,
                                        xfs_disk_dquot_t *);
 extern void             xfs_qm_adjust_dqlimits(xfs_mount_t *,
@@ -129,9 +142,17 @@ extern void		xfs_qm_adjust_dqlimits(xfs_mount_t *,
 extern int              xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
                                        xfs_dqid_t, uint, uint, xfs_dquot_t **);
 extern void             xfs_qm_dqput(xfs_dquot_t *);
-extern void             xfs_dqlock(xfs_dquot_t *);
-extern void             xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void             xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
-extern void             xfs_dqunlock(xfs_dquot_t *);
+extern void             xfs_dqunlock(struct xfs_dquot *);
-extern void             xfs_dqunlock_nonotify(xfs_dquot_t *);
+extern void             xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
+static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
+{
+        xfs_dqlock(dqp);
+        dqp->q_nrefs++;
+        xfs_dqunlock(dqp);
+        return dqp;
+}
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 0dee0b71029d..34baeae45265 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(
        logvec->i_len  = sizeof(xfs_disk_dquot_t);
        logvec->i_type = XLOG_REG_TYPE_DQUOT;
-        ASSERT(2 == lip->li_desc->lid_size);
        qlip->qli_format.qlf_size = 2;
 }
@@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(
         * lock without sleeping, then there must not have been
         * anyone in the process of flushing the dquot.
         */
-        error = xfs_qm_dqflush(dqp, 0);
+        error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
        if (error)
                xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
                        __func__, error, dqp);
@@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(
        if (atomic_read(&dqp->q_pincount) > 0)
                return XFS_ITEM_PINNED;
-        if (!xfs_qm_dqlock_nowait(dqp))
+        if (!xfs_dqlock_nowait(dqp))
                return XFS_ITEM_LOCKED;
        if (!xfs_dqflock_nowait(dqp)) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 753ed9b5c70b..f675f3d9d7b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -209,10 +209,10 @@ xfs_file_fsync(
        /*
         * First check if the VFS inode is marked dirty.  All the dirtying
-         * of non-transactional updates no goes through mark_inode_dirty*,
+         * of non-transactional updates do not go through mark_inode_dirty*,
-         * which allows us to distinguish beteeen pure timestamp updates
+         * which allows us to distinguish between pure timestamp updates
         * and i_size updates which need to be caught for fdatasync.
-         * After that also theck for the dirty state in the XFS inode, which
+         * After that also check for the dirty state in the XFS inode, which
         * might gets cleared when the inode gets written out via the AIL
         * or xfs_iflush_cluster.
         */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 169380e66057..dad1a31aa4fc 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -447,7 +447,7 @@ STATIC xfs_buf_t *			/* allocation group buffer */
 xfs_ialloc_ag_select(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_ino_t       parent,         /* parent directory inode number */
-        mode_t          mode,           /* bits set to indicate file type */
+        umode_t         mode,           /* bits set to indicate file type */
        int             okalloc)        /* ok to allocate more space */
 {
        xfs_buf_t       *agbp;          /* allocation group header buffer */
@@ -640,7 +640,7 @@ int
 xfs_dialloc(
        xfs_trans_t     *tp,            /* transaction pointer */
        xfs_ino_t       parent,         /* parent inode (directory) */
-        mode_t          mode,           /* mode bits for new inode */
+        umode_t         mode,           /* mode bits for new inode */
        int             okalloc,        /* ok to allocate more space */
        xfs_buf_t       **IO_agbp,      /* in/out ag header's buffer */
        boolean_t       *alloc_done,    /* true if we needed to replenish
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index bb5385475e1f..666a037398d6 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -81,7 +81,7 @@ int					/* error */
 xfs_dialloc(
        struct xfs_trans *tp,           /* transaction pointer */
        xfs_ino_t       parent,         /* parent inode (directory) */
-        mode_t          mode,           /* mode bits for new inode */
+        umode_t         mode,           /* mode bits for new inode */
        int             okalloc,        /* ok to allocate more space */
        struct xfs_buf  **agbp,         /* buf for a.g. inode header */
        boolean_t       *alloc_done,    /* an allocation was done to replenish
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0fa98b1c70ea..3960a066d7ff 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -107,7 +107,6 @@ xfs_inode_free_callback(
        struct inode            *inode = container_of(head, struct inode, i_rcu);
        struct xfs_inode        *ip = XFS_I(inode);
-        INIT_LIST_HEAD(&inode->i_dentry);
        kmem_zone_free(xfs_inode_zone, ip);
 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 755ee8164880..9dda7cc32848 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -961,7 +961,7 @@ int
 xfs_ialloc(
        xfs_trans_t     *tp,
        xfs_inode_t     *pip,
-        mode_t          mode,
+        umode_t         mode,
        xfs_nlink_t     nlink,
        xfs_dev_t       rdev,
        prid_t          prid,
@@ -1002,7 +1002,7 @@ xfs_ialloc(
                return error;
        ASSERT(ip != NULL);
-        ip->i_d.di_mode = (__uint16_t)mode;
+        ip->i_d.di_mode = mode;
        ip->i_d.di_onlink = 0;
        ip->i_d.di_nlink = nlink;
        ASSERT(ip->i_d.di_nlink == nlink);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b4cd4739f98e..f0e6b151ba37 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -481,7 +481,7 @@ void		xfs_inode_free(struct xfs_inode *ip);
 /*
 * xfs_inode.c prototypes.
 */
-int             xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
+int             xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
                           xfs_nlink_t, xfs_dev_t, prid_t, int,
                           struct xfs_buf **, boolean_t *, xfs_inode_t **);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index abaafdbb3e65..cfd6c7f8cc3c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -437,7 +437,6 @@ xfs_inode_item_format(
         * Assert that no attribute-related log flags are set.
         */
        if (!XFS_IFORK_Q(ip)) {
-                ASSERT(nvecs == lip->li_desc->lid_size);
                iip->ili_format.ilf_size = nvecs;
                ASSERT(!(iip->ili_format.ilf_fields &
                         (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -521,7 +520,6 @@ xfs_inode_item_format(
                break;
        }
-        ASSERT(nvecs == lip->li_desc->lid_size);
        iip->ili_format.ilf_size = nvecs;
 }
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d99a90518909..76f3ca5cfc36 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -559,23 +559,23 @@ xfs_attrmulti_by_handle(
                                        ops[i].am_flags);
                        break;
                case ATTR_OP_SET:
-                        ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                        ops[i].am_error = mnt_want_write_file(parfilp);
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_set(
                                        dentry->d_inode, attr_name,
                                        ops[i].am_attrvalue, ops[i].am_length,
                                        ops[i].am_flags);
-                        mnt_drop_write(parfilp->f_path.mnt);
+                        mnt_drop_write_file(parfilp);
                        break;
                case ATTR_OP_REMOVE:
-                        ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                        ops[i].am_error = mnt_want_write_file(parfilp);
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_remove(
                                        dentry->d_inode, attr_name,
                                        ops[i].am_flags);
-                        mnt_drop_write(parfilp->f_path.mnt);
+                        mnt_drop_write_file(parfilp);
                        break;
                default:
                        ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 54e623bfbb85..f9ccb7b7c043 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -454,23 +454,23 @@ xfs_compat_attrmulti_by_handle(
                                        &ops[i].am_length, ops[i].am_flags);
                        break;
                case ATTR_OP_SET:
-                        ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                        ops[i].am_error = mnt_want_write_file(parfilp);
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_set(
                                        dentry->d_inode, attr_name,
                                        compat_ptr(ops[i].am_attrvalue),
                                        ops[i].am_length, ops[i].am_flags);
-                        mnt_drop_write(parfilp->f_path.mnt);
+                        mnt_drop_write_file(parfilp);
                        break;
                case ATTR_OP_REMOVE:
-                        ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                        ops[i].am_error = mnt_want_write_file(parfilp);
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_remove(
                                        dentry->d_inode, attr_name,
                                        ops[i].am_flags);
-                        mnt_drop_write(parfilp->f_path.mnt);
+                        mnt_drop_write_file(parfilp);
                        break;
                default:
                        ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 23ce927973a4..f9babd179223 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -168,7 +168,7 @@ STATIC int
 xfs_vn_mknod(
        struct inode    *dir,
        struct dentry   *dentry,
-        int             mode,
+        umode_t         mode,
        dev_t           rdev)
 {
        struct inode    *inode;
@@ -231,7 +231,7 @@ STATIC int
 xfs_vn_create(
        struct inode    *dir,
        struct dentry   *dentry,
-        int             mode,
+        umode_t         mode,
        struct nameidata *nd)
 {
        return xfs_vn_mknod(dir, dentry, mode, 0);
@@ -241,7 +241,7 @@ STATIC int
 xfs_vn_mkdir(
        struct inode    *dir,
        struct dentry   *dentry,
-        int             mode)
+        umode_t         mode)
 {
        return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
 }
@@ -366,7 +366,7 @@ xfs_vn_symlink(
        struct xfs_inode *cip = NULL;
        struct xfs_name name;
        int             error;
-        mode_t          mode;
+        umode_t         mode;
        mode = S_IFLNK |
                (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 34817adf4b9e..e2cc3568c299 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -760,38 +760,6 @@ xfs_log_item_init(
        INIT_LIST_HEAD(&item->li_cil);
 }
-/*
- * Write region vectors to log.  The write happens using the space reservation
- * of the ticket (tic).  It is not a requirement that all writes for a given
- * transaction occur with one call to xfs_log_write(). However, it is important
- * to note that the transaction reservation code makes an assumption about the
- * number of log headers a transaction requires that may be violated if you
- * don't pass all the transaction vectors in one call....
- */
-int
-xfs_log_write(
-        struct xfs_mount        *mp,
-        struct xfs_log_iovec    reg[],
-        int                     nentries,
-        struct xlog_ticket      *tic,
-        xfs_lsn_t               *start_lsn)
-{
-        struct log              *log = mp->m_log;
-        int                     error;
-        struct xfs_log_vec      vec = {
-                .lv_niovecs = nentries,
-                .lv_iovecp = reg,
-        };
-        if (XLOG_FORCED_SHUTDOWN(log))
-                return XFS_ERROR(EIO);
-        error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
-        if (error)
-                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-        return error;
-}
 void
 xfs_log_move_tail(xfs_mount_t   *mp,
                  xfs_lsn_t     tail_lsn)
@@ -1685,7 +1653,7 @@ xlog_print_tic_res(
        };
        xfs_warn(mp,
-                "xfs_log_write: reservation summary:\n"
+                "xlog_write: reservation summary:\n"
                "  trans type  = %s (%u)\n"
                "  unit res    = %d bytes\n"
                "  current res = %d bytes\n"
@@ -1714,7 +1682,7 @@ xlog_print_tic_res(
        }
        xfs_alert_tag(mp, XFS_PTAG_LOGRES,
-                "xfs_log_write: reservation ran out. Need to up reservation");
+                "xlog_write: reservation ran out. Need to up reservation");
        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 }
@@ -1968,23 +1936,21 @@ xlog_write(
        *start_lsn = 0;
        len = xlog_write_calc_vec_length(ticket, log_vector);
-        if (log->l_cilp) {
-                /*
-                 * Region headers and bytes are already accounted for.
-                 * We only need to take into account start records and
-                 * split regions in this function.
-                 */
-                if (ticket->t_flags & XLOG_TIC_INITED)
-                        ticket->t_curr_res -= sizeof(xlog_op_header_t);
-                /*
+        /*
-                 * Commit record headers need to be accounted for. These
+         * Region headers and bytes are already accounted for.
-                 * come in as separate writes so are easy to detect.
+         * We only need to take into account start records and
-                 */
+         * split regions in this function.
-                if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
+         */
-                        ticket->t_curr_res -= sizeof(xlog_op_header_t);
+        if (ticket->t_flags & XLOG_TIC_INITED)
-        } else
+                ticket->t_curr_res -= sizeof(xlog_op_header_t);
-                ticket->t_curr_res -= len;
+        /*
+         * Commit record headers need to be accounted for. These
+         * come in as separate writes so are easy to detect.
+         */
+        if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
+                ticket->t_curr_res -= sizeof(xlog_op_header_t);
        if (ticket->t_curr_res < 0)
                xlog_print_tic_res(log->l_mp, ticket);
@@ -2931,8 +2897,7 @@ _xfs_log_force(
        XFS_STATS_INC(xs_log_force);
-        if (log->l_cilp)
+        xlog_cil_force(log);
-                xlog_cil_force(log);
        spin_lock(&log->l_icloglock);
@@ -3081,11 +3046,9 @@ _xfs_log_force_lsn(
        XFS_STATS_INC(xs_log_force);
-        if (log->l_cilp) {
+        lsn = xlog_cil_force_lsn(log, lsn);
-                lsn = xlog_cil_force_lsn(log, lsn);
+        if (lsn == NULLCOMMITLSN)
-                if (lsn == NULLCOMMITLSN)
+                return 0;
-                        return 0;
-        }
 try_again:
        spin_lock(&log->l_icloglock);
@@ -3653,7 +3616,7 @@ xfs_log_force_umount(
         * completed transactions are flushed to disk with the xfs_log_force()
         * call below.
         */
-        if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
+        if (!logerror)
                xlog_cil_force(log);
        /*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3f7bf451c034..2aee3b22d29c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -174,11 +174,6 @@ int	  xfs_log_reserve(struct xfs_mount *mp,
                          __uint8_t        clientid,
                          uint             flags,
                          uint             t_type);
-int       xfs_log_write(struct xfs_mount *mp,
-                        xfs_log_iovec_t  region[],
-                        int              nentries,
-                        struct xlog_ticket *ticket,
-                        xfs_lsn_t        *start_lsn);
 int       xfs_log_unmount_write(struct xfs_mount *mp);
 void      xfs_log_unmount(struct xfs_mount *mp);
 int       xfs_log_force_umount(struct xfs_mount *mp, int logerror);
@@ -189,8 +184,7 @@ void	  xlog_iodone(struct xfs_buf *);
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void      xfs_log_ticket_put(struct xlog_ticket *ticket);
-void    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+int     xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
-                                struct xfs_log_vec *log_vector,
                                xfs_lsn_t *commit_lsn, int flags);
 bool    xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index c7755d5a5fbe..d4fadbe8ac90 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -32,10 +32,7 @@
 #include "xfs_discard.h"
 /*
- * Perform initial CIL structure initialisation. If the CIL is not
+ * Perform initial CIL structure initialisation.
- * enabled in this filesystem, ensure the log->l_cilp is null so
- * we can check this conditional to determine if we are doing delayed
- * logging or not.
 */
 int
 xlog_cil_init(
@@ -44,10 +41,6 @@ xlog_cil_init(
        struct xfs_cil  *cil;
        struct xfs_cil_ctx *ctx;
-        log->l_cilp = NULL;
-        if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
-                return 0;
        cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
        if (!cil)
                return ENOMEM;
@@ -80,9 +73,6 @@ void
 xlog_cil_destroy(
        struct log      *log)
 {
-        if (!log->l_cilp)
-                return;
        if (log->l_cilp->xc_ctx) {
                if (log->l_cilp->xc_ctx->ticket)
                        xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
@@ -137,9 +127,6 @@ void
 xlog_cil_init_post_recovery(
        struct log      *log)
 {
-        if (!log->l_cilp)
-                return;
        log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
        log->l_cilp->xc_ctx->sequence = 1;
        log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(
 * format the regions into the iclog as though they are being formatted
 * directly out of the objects themselves.
 */
-static void
+static struct xfs_log_vec *
-xlog_cil_format_items(
+xlog_cil_prepare_log_vecs(
-        struct log              *log,
+        struct xfs_trans        *tp)
-        struct xfs_log_vec      *log_vector)
 {
-        struct xfs_log_vec *lv;
+        struct xfs_log_item_desc *lidp;
+        struct xfs_log_vec      *lv = NULL;
+        struct xfs_log_vec      *ret_lv = NULL;
-        ASSERT(log_vector);
-        for (lv = log_vector; lv; lv = lv->lv_next) {
+        /* Bail out if we didn't find a log item.  */
+        if (list_empty(&tp->t_items)) {
+                ASSERT(0);
+                return NULL;
+        }
+        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+                struct xfs_log_vec *new_lv;
                void    *ptr;
                int     index;
                int     len = 0;
+                uint    niovecs;
+                /* Skip items which aren't dirty in this transaction. */
+                if (!(lidp->lid_flags & XFS_LID_DIRTY))
+                        continue;
+                /* Skip items that do not have any vectors for writing */
+                niovecs = IOP_SIZE(lidp->lid_item);
+                if (!niovecs)
+                        continue;
+                new_lv = kmem_zalloc(sizeof(*new_lv) +
+                                niovecs * sizeof(struct xfs_log_iovec),
+                                KM_SLEEP);
+                /* The allocated iovec region lies beyond the log vector. */
+                new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
+                new_lv->lv_niovecs = niovecs;
+                new_lv->lv_item = lidp->lid_item;
                /* build the vector array and calculate it's length */
-                IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
+                IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
-                for (index = 0; index < lv->lv_niovecs; index++)
+                for (index = 0; index < new_lv->lv_niovecs; index++)
-                        len += lv->lv_iovecp[index].i_len;
+                        len += new_lv->lv_iovecp[index].i_len;
-                lv->lv_buf_len = len;
+                new_lv->lv_buf_len = len;
-                lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
+                new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
-                ptr = lv->lv_buf;
+                                KM_SLEEP|KM_NOFS);
+                ptr = new_lv->lv_buf;
-                for (index = 0; index < lv->lv_niovecs; index++) {
+                for (index = 0; index < new_lv->lv_niovecs; index++) {
-                        struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
+                        struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
                        memcpy(ptr, vec->i_addr, vec->i_len);
                        vec->i_addr = ptr;
                        ptr += vec->i_len;
                }
-                ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
+                ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
+                if (!ret_lv)
+                        ret_lv = new_lv;
+                else
+                        lv->lv_next = new_lv;
+                lv = new_lv;
        }
+        return ret_lv;
 }
 /*
@@ -256,7 +279,7 @@ xfs_cil_prepare_item(
 * Insert the log items into the CIL and calculate the difference in space
 * consumed by the item. Add the space to the checkpoint ticket and calculate
 * if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
+ * as well. Remove the amount of space we added to the checkpoint ticket from
 * the current transaction ticket so that the accounting works out correctly.
 */
 static void
@@ -635,28 +658,30 @@ out_abort:
 * background commit, returns without it held once background commits are
 * allowed again.
 */
-void
+int
 xfs_log_commit_cil(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
-        struct xfs_log_vec      *log_vector,
        xfs_lsn_t               *commit_lsn,
        int                     flags)
 {
        struct log              *log = mp->m_log;
        int                     log_flags = 0;
        int                     push = 0;
+        struct xfs_log_vec      *log_vector;
        if (flags & XFS_TRANS_RELEASE_LOG_RES)
                log_flags = XFS_LOG_REL_PERM_RESERV;
        /*
-         * do all the hard work of formatting items (including memory
+         * Do all the hard work of formatting items (including memory
         * allocation) outside the CIL context lock. This prevents stalling CIL
         * pushes when we are low on memory and a transaction commit spends a
         * lot of time in memory reclaim.
         */
-        xlog_cil_format_items(log, log_vector);
+        log_vector = xlog_cil_prepare_log_vecs(tp);
+        if (!log_vector)
+                return ENOMEM;
        /* lock out background commit */
        down_read(&log->l_cilp->xc_ctx_lock);
@@ -709,6 +734,7 @@ xfs_log_commit_cil(
         */
        if (push)
                xlog_cil_push(log, 0);
+        return 0;
 }
 /*
@@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(
 {
        struct xfs_cil_ctx *ctx;
-        if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
-                return false;
        if (list_empty(&lip->li_cil))
                return false;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bb24dac42a25..19f69e232509 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,7 +219,6 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_WSYNC         (1ULL << 0)     /* for nfs - all metadata ops
                                                   must be synchronous except
                                                   for space allocations */
-#define XFS_MOUNT_DELAYLOG      (1ULL << 1)     /* delayed logging is enabled */
 #define XFS_MOUNT_WAS_CLEAN     (1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN   (1ULL << 4)     /* atomic stop of all filesystem
                                                   operations, typically for
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 0bbb1a41998b..671f37eae1c7 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -154,12 +154,17 @@ STATIC void
 xfs_qm_destroy(
        struct xfs_qm   *xqm)
 {
-        struct xfs_dquot *dqp, *n;
        int             hsize, i;
        ASSERT(xqm != NULL);
        ASSERT(xqm->qm_nrefs == 0);
        unregister_shrinker(&xfs_qm_shaker);
+        mutex_lock(&xqm->qm_dqfrlist_lock);
+        ASSERT(list_empty(&xqm->qm_dqfrlist));
+        mutex_unlock(&xqm->qm_dqfrlist_lock);
        hsize = xqm->qm_dqhashmask + 1;
        for (i = 0; i < hsize; i++) {
                xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -171,17 +176,6 @@ xfs_qm_destroy(
        xqm->qm_grp_dqhtable = NULL;
        xqm->qm_dqhashmask = 0;
-        /* frlist cleanup */
-        mutex_lock(&xqm->qm_dqfrlist_lock);
-        list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
-                xfs_dqlock(dqp);
-                list_del_init(&dqp->q_freelist);
-                xfs_Gqm->qm_dqfrlist_cnt--;
-                xfs_dqunlock(dqp);
-                xfs_qm_dqdestroy(dqp);
-        }
-        mutex_unlock(&xqm->qm_dqfrlist_lock);
-        mutex_destroy(&xqm->qm_dqfrlist_lock);
        kmem_free(xqm);
 }
@@ -232,34 +226,10 @@ STATIC void
 xfs_qm_rele_quotafs_ref(
        struct xfs_mount *mp)
 {
-        xfs_dquot_t     *dqp, *n;
        ASSERT(xfs_Gqm);
        ASSERT(xfs_Gqm->qm_nrefs > 0);
        /*
-         * Go thru the freelist and destroy all inactive dquots.
-         */
-        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-        list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-                xfs_dqlock(dqp);
-                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                        ASSERT(dqp->q_mount == NULL);
-                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                        ASSERT(list_empty(&dqp->q_hashlist));
-                        ASSERT(list_empty(&dqp->q_mplist));
-                        list_del_init(&dqp->q_freelist);
-                        xfs_Gqm->qm_dqfrlist_cnt--;
-                        xfs_dqunlock(dqp);
-                        xfs_qm_dqdestroy(dqp);
-                } else {
-                        xfs_dqunlock(dqp);
-                }
-        }
-        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-        /*
         * Destroy the entire XQM. If somebody mounts with quotaon, this'll
         * be restarted.
         */
@@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(
 */
 STATIC int
 xfs_qm_dqflush_all(
-        struct xfs_mount        *mp,
+        struct xfs_mount        *mp)
-        int                     sync_mode)
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        int                     recl;
@@ -429,7 +398,8 @@ again:
        mutex_lock(&q->qi_dqlist_lock);
        list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
-                if (! XFS_DQ_IS_DIRTY(dqp)) {
+                if ((dqp->dq_flags & XFS_DQ_FREEING) ||
+                    !XFS_DQ_IS_DIRTY(dqp)) {
                        xfs_dqunlock(dqp);
                        continue;
                }
@@ -444,14 +414,14 @@ again:
                         * out immediately.  We'll be able to acquire
                         * the flush lock when the I/O completes.
                         */
-                        xfs_qm_dqflock_pushbuf_wait(dqp);
+                        xfs_dqflock_pushbuf_wait(dqp);
                }
                /*
                 * Let go of the mplist lock. We don't want to hold it
                 * across a disk write.
                 */
                mutex_unlock(&q->qi_dqlist_lock);
-                error = xfs_qm_dqflush(dqp, sync_mode);
+                error = xfs_qm_dqflush(dqp, 0);
                xfs_dqunlock(dqp);
                if (error)
                        return error;
@@ -468,6 +438,7 @@ again:
        /* return ! busy */
        return 0;
 }
 /*
 * Release the group dquot pointers the user dquots may be
 * carrying around as a hint. mplist is locked on entry and exit.
@@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        struct xfs_dquot        *dqp, *gdqp;
-        int                     nrecl;
 again:
        ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
        list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
-                if ((gdqp = dqp->q_gdquot)) {
+                if (dqp->dq_flags & XFS_DQ_FREEING) {
-                        xfs_dqlock(gdqp);
+                        xfs_dqunlock(dqp);
-                        dqp->q_gdquot = NULL;
-                }
-                xfs_dqunlock(dqp);
-                if (gdqp) {
-                        /*
-                         * Can't hold the mplist lock across a dqput.
-                         * XXXmust convert to marker based iterations here.
-                         */
-                        nrecl = q->qi_dqreclaims;
                        mutex_unlock(&q->qi_dqlist_lock);
-                        xfs_qm_dqput(gdqp);
+                        delay(1);
                        mutex_lock(&q->qi_dqlist_lock);
-                        if (nrecl != q->qi_dqreclaims)
+                        goto again;
-                                goto again;
                }
+                gdqp = dqp->q_gdquot;
+                if (gdqp)
+                        dqp->q_gdquot = NULL;
+                xfs_dqunlock(dqp);
+                if (gdqp)
+                        xfs_qm_dqrele(gdqp);
        }
 }
@@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        struct xfs_dquot        *dqp, *n;
        uint                    dqtype;
-        int                     nrecl;
+        int                     nmisses = 0;
-        int                     nmisses;
+        LIST_HEAD               (dispose_list);
        if (!q)
                return 0;
@@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(
         */
        xfs_qm_detach_gdquots(mp);
-      again:
-        nmisses = 0;
-        ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
        /*
-         * Try to get rid of all of the unwanted dquots. The idea is to
+         * Try to get rid of all of the unwanted dquots.
-         * get them off mplist and hashlist, but leave them on freelist.
         */
        list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-                /*
+                xfs_dqlock(dqp);
-                 * It's OK to look at the type without taking dqlock here.
+                if ((dqp->dq_flags & dqtype) != 0 &&
-                 * We're holding the mplist lock here, and that's needed for
+                    !(dqp->dq_flags & XFS_DQ_FREEING)) {
-                 * a dqreclaim.
+                        if (dqp->q_nrefs == 0) {
-                 */
+                                dqp->dq_flags |= XFS_DQ_FREEING;
-                if ((dqp->dq_flags & dqtype) == 0)
+                                list_move_tail(&dqp->q_mplist, &dispose_list);
-                        continue;
+                        } else
+                                nmisses++;
-                if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                        nrecl = q->qi_dqreclaims;
-                        mutex_unlock(&q->qi_dqlist_lock);
-                        mutex_lock(&dqp->q_hash->qh_lock);
-                        mutex_lock(&q->qi_dqlist_lock);
-                        /*
-                         * XXXTheoretically, we can get into a very long
-                         * ping pong game here.
-                         * No one can be adding dquots to the mplist at
-                         * this point, but somebody might be taking things off.
-                         */
-                        if (nrecl != q->qi_dqreclaims) {
-                                mutex_unlock(&dqp->q_hash->qh_lock);
-                                goto again;
-                        }
                }
+                xfs_dqunlock(dqp);
-                /*
-                 * Take the dquot off the mplist and hashlist. It may remain on
-                 * freelist in INACTIVE state.
-                 */
-                nmisses += xfs_qm_dqpurge(dqp);
        }
        mutex_unlock(&q->qi_dqlist_lock);
+        list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
+                xfs_qm_dqpurge(dqp);
        return nmisses;
 }
@@ -648,12 +593,9 @@ xfs_qm_dqattach_one(
                 */
                dqp = udqhint->q_gdquot;
                if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
-                        xfs_dqlock(dqp);
-                        XFS_DQHOLD(dqp);
                        ASSERT(*IO_idqpp == NULL);
-                        *IO_idqpp = dqp;
-                        xfs_dqunlock(dqp);
+                        *IO_idqpp = xfs_qm_dqhold(dqp);
                        xfs_dqunlock(udqhint);
                        return 0;
                }
@@ -693,11 +635,7 @@ xfs_qm_dqattach_one(
 /*
 * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
+ * udquot as a hint for future lookups.
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
 */
 STATIC void
 xfs_qm_dqattach_grouphint(
@@ -708,45 +646,17 @@ xfs_qm_dqattach_grouphint(
        xfs_dqlock(udq);
-        if ((tmp = udq->q_gdquot)) {
+        tmp = udq->q_gdquot;
-                if (tmp == gdq) {
+        if (tmp) {
-                        xfs_dqunlock(udq);
+                if (tmp == gdq)
-                        return;
+                        goto done;
-                }
                udq->q_gdquot = NULL;
-                /*
-                 * We can't keep any dqlocks when calling dqrele,
-                 * because the freelist lock comes before dqlocks.
-                 */
-                xfs_dqunlock(udq);
-                /*
-                 * we took a hard reference once upon a time in dqget,
-                 * so give it back when the udquot no longer points at it
-                 * dqput() does the unlocking of the dquot.
-                 */
                xfs_qm_dqrele(tmp);
-                xfs_dqlock(udq);
-                xfs_dqlock(gdq);
-        } else {
-                ASSERT(XFS_DQ_IS_LOCKED(udq));
-                xfs_dqlock(gdq);
-        }
-        ASSERT(XFS_DQ_IS_LOCKED(udq));
-        ASSERT(XFS_DQ_IS_LOCKED(gdq));
-        /*
-         * Somebody could have attached a gdquot here,
-         * when we dropped the uqlock. If so, just do nothing.
-         */
-        if (udq->q_gdquot == NULL) {
-                XFS_DQHOLD(gdq);
-                udq->q_gdquot = gdq;
        }
-        xfs_dqunlock(gdq);
+        udq->q_gdquot = xfs_qm_dqhold(gdq);
+done:
        xfs_dqunlock(udq);
 }
@@ -813,17 +723,13 @@ xfs_qm_dqattach_locked(
                ASSERT(ip->i_gdquot);
                /*
-                 * We may or may not have the i_udquot locked at this point,
+                 * We do not have i_udquot locked at this point, but this check
-                 * but this check is OK since we don't depend on the i_gdquot to
+                 * is OK since we don't depend on the i_gdquot to be accurate
-                 * be accurate 100% all the time. It is just a hint, and this
+                 * 100% all the time. It is just a hint, and this will
-                 * will succeed in general.
+                 * succeed in general.
-                 */
-                if (ip->i_udquot->q_gdquot == ip->i_gdquot)
-                        goto done;
-                /*
-                 * Attach i_gdquot to the gdquot hint inside the i_udquot.
                 */
-                xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
+                if (ip->i_udquot->q_gdquot != ip->i_gdquot)
+                        xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
        }
 done:
@@ -879,100 +785,6 @@ xfs_qm_dqdetach(
        }
 }
-int
-xfs_qm_sync(
-        struct xfs_mount        *mp,
-        int                     flags)
-{
-        struct xfs_quotainfo    *q = mp->m_quotainfo;
-        int                     recl, restarts;
-        struct xfs_dquot        *dqp;
-        int                     error;
-        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-                return 0;
-        restarts = 0;
-  again:
-        mutex_lock(&q->qi_dqlist_lock);
-        /*
-         * dqpurge_all() also takes the mplist lock and iterate thru all dquots
-         * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
-         * when we have the mplist lock, we know that dquots will be consistent
-         * as long as we have it locked.
-         */
-        if (!XFS_IS_QUOTA_ON(mp)) {
-                mutex_unlock(&q->qi_dqlist_lock);
-                return 0;
-        }
-        ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-        list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-                /*
-                 * If this is vfs_sync calling, then skip the dquots that
-                 * don't 'seem' to be dirty. ie. don't acquire dqlock.
-                 * This is very similar to what xfs_sync does with inodes.
-                 */
-                if (flags & SYNC_TRYLOCK) {
-                        if (!XFS_DQ_IS_DIRTY(dqp))
-                                continue;
-                        if (!xfs_qm_dqlock_nowait(dqp))
-                                continue;
-                } else {
-                        xfs_dqlock(dqp);
-                }
-                /*
-                 * Now, find out for sure if this dquot is dirty or not.
-                 */
-                if (! XFS_DQ_IS_DIRTY(dqp)) {
-                        xfs_dqunlock(dqp);
-                        continue;
-                }
-                /* XXX a sentinel would be better */
-                recl = q->qi_dqreclaims;
-                if (!xfs_dqflock_nowait(dqp)) {
-                        if (flags & SYNC_TRYLOCK) {
-                                xfs_dqunlock(dqp);
-                                continue;
-                        }
-                        /*
-                         * If we can't grab the flush lock then if the caller
-                         * really wanted us to give this our best shot, so
-                         * see if we can give a push to the buffer before we wait
-                         * on the flush lock. At this point, we know that
-                         * even though the dquot is being flushed,
-                         * it has (new) dirty data.
-                         */
-                        xfs_qm_dqflock_pushbuf_wait(dqp);
-                }
-                /*
-                 * Let go of the mplist lock. We don't want to hold it
-                 * across a disk write
-                 */
-                mutex_unlock(&q->qi_dqlist_lock);
-                error = xfs_qm_dqflush(dqp, flags);
-                xfs_dqunlock(dqp);
-                if (error && XFS_FORCED_SHUTDOWN(mp))
-                        return 0;       /* Need to prevent umount failure */
-                else if (error)
-                        return error;
-                mutex_lock(&q->qi_dqlist_lock);
-                if (recl != q->qi_dqreclaims) {
-                        if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
-                                break;
-                        mutex_unlock(&q->qi_dqlist_lock);
-                        goto again;
-                }
-        }
-        mutex_unlock(&q->qi_dqlist_lock);
-        return 0;
-}
 /*
 * The hash chains and the mplist use the same xfs_dqhash structure as
 * their list head, but we can take the mplist qh_lock and one of the
@@ -1034,18 +846,21 @@ xfs_qm_init_quotainfo(
        /*
         * We try to get the limits from the superuser's limits fields.
         * This is quite hacky, but it is standard quota practice.
+         *
         * We look at the USR dquot with id == 0 first, but if user quotas
         * are not enabled we goto the GRP dquot with id == 0.
         * We don't really care to keep separate default limits for user
         * and group quotas, at least not at this point.
+         *
+         * Since we may not have done a quotacheck by this point, just read
+         * the dquot without attaching it to any hashtables or lists.
         */
-        error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+        error = xfs_qm_dqread(mp, 0,
-                             XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+                        XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
-                             (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+                         (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
-                                XFS_DQ_PROJ),
+                          XFS_DQ_PROJ),
-                             XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+                        XFS_QMOPT_DOWARN, &dqp);
-                             &dqp);
+        if (!error) {
-        if (! error) {
                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
                /*
@@ -1072,11 +887,6 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
                qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
 
-                /*
-                 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
-                 * we don't want this dquot cached. We haven't done a
-                 * quotacheck yet, and quotacheck doesn't like incore dquots.
-                 */
                xfs_qm_dqdestroy(dqp);
        } else {
                qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -1661,7 +1471,7 @@ xfs_qm_quotacheck(
         * successfully.
         */
        if (!error)
-                error = xfs_qm_dqflush_all(mp, 0);
+                error = xfs_qm_dqflush_all(mp);
        /*
         * We can get this error if we couldn't do a dquot allocation inside
@@ -1793,59 +1603,33 @@ xfs_qm_init_quotainos(
 /*
- * Just pop the least recently used dquot off the freelist and
+ * Pop the least recently used dquot off the freelist and recycle it.
- * recycle it. The returned dquot is locked.
 */
-STATIC xfs_dquot_t *
+STATIC struct xfs_dquot *
 xfs_qm_dqreclaim_one(void)
 {
-        xfs_dquot_t     *dqpout;
+        struct xfs_dquot        *dqp;
-        xfs_dquot_t     *dqp;
+        int                     restarts = 0;
-        int             restarts;
-        int             startagain;
-        restarts = 0;
-        dqpout = NULL;
-        /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
-        startagain = 0;
        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+restart:
        list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
                struct xfs_mount *mp = dqp->q_mount;
-                xfs_dqlock(dqp);
+                if (!xfs_dqlock_nowait(dqp))
+                        continue;
                /*
-                 * We are racing with dqlookup here. Naturally we don't
+                 * This dquot has already been grabbed by dqlookup.
-                 * want to reclaim a dquot that lookup wants. We release the
+                 * Remove it from the freelist and try again.
-                 * freelist lock and start over, so that lookup will grab
-                 * both the dquot and the freelistlock.
                 */
-                if (dqp->dq_flags & XFS_DQ_WANT) {
+                if (dqp->q_nrefs) {
-                        ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
                        trace_xfs_dqreclaim_want(dqp);
                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                        restarts++;
-                        startagain = 1;
-                        goto dqunlock;
-                }
-                /*
-                 * If the dquot is inactive, we are assured that it is
-                 * not on the mplist or the hashlist, and that makes our
-                 * life easier.
-                 */
-                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                        ASSERT(mp == NULL);
-                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                        ASSERT(list_empty(&dqp->q_hashlist));
-                        ASSERT(list_empty(&dqp->q_mplist));
                        list_del_init(&dqp->q_freelist);
                        xfs_Gqm->qm_dqfrlist_cnt--;
-                        dqpout = dqp;
+                        restarts++;
-                        XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
                        goto dqunlock;
                }
@@ -1874,64 +1658,49 @@ again:
                         * We flush it delayed write, so don't bother
                         * releasing the freelist lock.
                         */
-                        error = xfs_qm_dqflush(dqp, 0);
+                        error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
                        if (error) {
                                xfs_warn(mp, "%s: dquot %p flush failed",
                                        __func__, dqp);
                        }
                        goto dqunlock;
                }
+                xfs_dqfunlock(dqp);
                /*
-                 * We're trying to get the hashlock out of order. This races
+                 * Prevent lookup now that we are going to reclaim the dquot.
-                 * with dqlookup; so, we giveup and goto the next dquot if
+                 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
-                 * we couldn't get the hashlock. This way, we won't starve
+                 * thus we can drop the lock now.
-                 * a dqlookup process that holds the hashlock that is
-                 * waiting for the freelist lock.
                 */
-                if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                dqp->dq_flags |= XFS_DQ_FREEING;
-                        restarts++;
+                xfs_dqunlock(dqp);
-                        goto dqfunlock;
-                }
-                /*
+                mutex_lock(&dqp->q_hash->qh_lock);
-                 * This races with dquot allocation code as well as dqflush_all
+                list_del_init(&dqp->q_hashlist);
-                 * and reclaim code. So, if we failed to grab the mplist lock,
+                dqp->q_hash->qh_version++;
-                 * giveup everything and start over.
+                mutex_unlock(&dqp->q_hash->qh_lock);
-                 */
-                if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-                        restarts++;
-                        startagain = 1;
-                        goto qhunlock;
-                }
-                ASSERT(dqp->q_nrefs == 0);
+                mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
                list_del_init(&dqp->q_mplist);
                mp->m_quotainfo->qi_dquots--;
                mp->m_quotainfo->qi_dqreclaims++;
-                list_del_init(&dqp->q_hashlist);
+                mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-                dqp->q_hash->qh_version++;
+                ASSERT(dqp->q_nrefs == 0);
                list_del_init(&dqp->q_freelist);
                xfs_Gqm->qm_dqfrlist_cnt--;
-                dqpout = dqp;
-                mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+                mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-qhunlock:
+                return dqp;
-                mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
-                xfs_dqfunlock(dqp);
 dqunlock:
                xfs_dqunlock(dqp);
-                if (dqpout)
-                        break;
                if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
                        break;
-                if (startagain) {
+                goto restart;
-                        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                        goto again;
-                }
        }
        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-        return dqpout;
+        return NULL;
 }
 /*
@@ -2151,10 +1920,7 @@ xfs_qm_vop_dqalloc(
                         * this to caller
                         */
                        ASSERT(ip->i_udquot);
-                        uq = ip->i_udquot;
+                        uq = xfs_qm_dqhold(ip->i_udquot);
-                        xfs_dqlock(uq);
-                        XFS_DQHOLD(uq);
-                        xfs_dqunlock(uq);
                }
        }
        if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
@@ -2175,10 +1941,7 @@ xfs_qm_vop_dqalloc(
                        xfs_ilock(ip, lockflags);
                } else {
                        ASSERT(ip->i_gdquot);
-                        gq = ip->i_gdquot;
+                        gq = xfs_qm_dqhold(ip->i_gdquot);
-                        xfs_dqlock(gq);
-                        XFS_DQHOLD(gq);
-                        xfs_dqunlock(gq);
                }
        } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
                if (xfs_get_projid(ip) != prid) {
@@ -2198,10 +1961,7 @@ xfs_qm_vop_dqalloc(
                        xfs_ilock(ip, lockflags);
                } else {
                        ASSERT(ip->i_gdquot);
-                        gq = ip->i_gdquot;
+                        gq = xfs_qm_dqhold(ip->i_gdquot);
-                        xfs_dqlock(gq);
-                        XFS_DQHOLD(gq);
-                        xfs_dqunlock(gq);
                }
        }
        if (uq)
@@ -2251,14 +2011,10 @@ xfs_qm_vop_chown(
        xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
        /*
-         * Take an extra reference, because the inode
+         * Take an extra reference, because the inode is going to keep
-         * is going to keep this dquot pointer even
+         * this dquot pointer even after the trans_commit.
-         * after the trans_commit.
         */
-        xfs_dqlock(newdq);
+        *IO_olddq = xfs_qm_dqhold(newdq);
-        XFS_DQHOLD(newdq);
-        xfs_dqunlock(newdq);
-        *IO_olddq = newdq;
        return prevdq;
 }
@@ -2390,25 +2146,21 @@ xfs_qm_vop_create_dqattach(
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
        if (udqp) {
-                xfs_dqlock(udqp);
-                XFS_DQHOLD(udqp);
-                xfs_dqunlock(udqp);
                ASSERT(ip->i_udquot == NULL);
-                ip->i_udquot = udqp;
                ASSERT(XFS_IS_UQUOTA_ON(mp));
                ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+                ip->i_udquot = xfs_qm_dqhold(udqp);
                xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
        if (gdqp) {
-                xfs_dqlock(gdqp);
-                XFS_DQHOLD(gdqp);
-                xfs_dqunlock(gdqp);
                ASSERT(ip->i_gdquot == NULL);
-                ip->i_gdquot = gdqp;
                ASSERT(XFS_IS_OQUOTA_ON(mp));
                ASSERT((XFS_IS_GQUOTA_ON(mp) ?
                        ip->i_d.di_gid : xfs_get_projid(ip)) ==
                                be32_to_cpu(gdqp->q_core.d_id));
+                ip->i_gdquot = xfs_qm_dqhold(gdqp);
                xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
        }
 }
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 43b9abe1052c..9b4f3adefbc5 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -33,12 +33,6 @@ extern kmem_zone_t	*qm_dqzone;
 extern kmem_zone_t      *qm_dqtrxzone;
 /*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS        7
-/*
 * Ditto, for xfs_qm_dqreclaim_one.
 */
 #define XFS_QM_RECLAIM_MAX_RESTARTS     4
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index a595f29567fe..8a0807e0f979 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
 #define XFS_DQ_PROJ             0x0002          /* project quota */
 #define XFS_DQ_GROUP            0x0004          /* a group quota */
 #define XFS_DQ_DIRTY            0x0008          /* dquot is dirty */
-#define XFS_DQ_WANT             0x0010          /* for lookup/reclaim race */
+#define XFS_DQ_FREEING          0x0010          /* dquot is beeing torn down */
-#define XFS_DQ_INACTIVE         0x0020          /* dq off mplist & hashlist */
 #define XFS_DQ_ALLTYPES         (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
        { XFS_DQ_PROJ,          "PROJ" }, \
        { XFS_DQ_GROUP,         "GROUP" }, \
        { XFS_DQ_DIRTY,         "DIRTY" }, \
-        { XFS_DQ_WANT,          "WANT" }, \
+        { XFS_DQ_FREEING,       "FREEING" }
-        { XFS_DQ_INACTIVE,      "INACTIVE" }
 /*
 * In the worst case, when both user and group quotas are on,
@@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_UQUOTA        0x0000004 /* user dquot requested */
 #define XFS_QMOPT_PQUOTA        0x0000008 /* project dquot requested */
 #define XFS_QMOPT_FORCE_RES     0x0000010 /* ignore quota limits */
-#define XFS_QMOPT_DQSUSER       0x0000020 /* don't cache super users dquot */
 #define XFS_QMOPT_SBVERSION     0x0000040 /* change superblock version num */
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
 #define XFS_QMOPT_DQREPAIR      0x0001000 /* repair dquot if damaged */
@@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
 extern void xfs_qm_dqdetach(struct xfs_inode *);
 extern void xfs_qm_dqrele(struct xfs_dquot *);
 extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
-extern int xfs_qm_sync(struct xfs_mount *, int);
 extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
 extern void xfs_qm_mount_quotas(struct xfs_mount *);
 extern void xfs_qm_unmount(struct xfs_mount *);
@@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
 #define xfs_qm_dqdetach(ip)
 #define xfs_qm_dqrele(d)
 #define xfs_qm_statvfs(ip, s)
-static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
-{
-        return 0;
-}
 #define xfs_qm_newmount(mp, a, b)                                       (0)
 #define xfs_qm_mount_quotas(mp)
 #define xfs_qm_unmount(mp)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3eca58f51ae9..281961c1d81a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -199,7 +199,6 @@ xfs_parseargs(
        mp->m_flags |= XFS_MOUNT_BARRIER;
        mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
        mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-        mp->m_flags |= XFS_MOUNT_DELAYLOG;
        /*
         * These can be overridden by the mount option parsing.
@@ -353,11 +352,11 @@ xfs_parseargs(
                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-                        mp->m_flags |= XFS_MOUNT_DELAYLOG;
+                        xfs_warn(mp,
+        "delaylog is the default now, option is deprecated.");
                } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-                        mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
                        xfs_warn(mp,
-        "nodelaylog is deprecated and will be removed in Linux 3.3");
+        "nodelaylog support has been removed, option is deprecated.");
                } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
                        mp->m_flags |= XFS_MOUNT_DISCARD;
                } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
@@ -395,13 +394,6 @@ xfs_parseargs(
                return EINVAL;
        }
-        if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
-            !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
-                xfs_warn(mp,
-        "the discard option is incompatible with the nodelaylog option");
-                return EINVAL;
-        }
 #ifndef CONFIG_XFS_QUOTA
        if (XFS_IS_QUOTA_RUNNING(mp)) {
                xfs_warn(mp, "quota support not available in this kernel.");
@@ -501,7 +493,6 @@ xfs_showargs(
                { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
                { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
                { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
-                { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
                { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
                { 0, NULL }
        };
@@ -869,27 +860,6 @@ xfs_fs_dirty_inode(
 }
 STATIC int
-xfs_log_inode(
-        struct xfs_inode        *ip)
-{
-        struct xfs_mount        *mp = ip->i_mount;
-        struct xfs_trans        *tp;
-        int                     error;
-        tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-        error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-        if (error) {
-                xfs_trans_cancel(tp, 0);
-                return error;
-        }
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        return xfs_trans_commit(tp, 0);
-}
-STATIC int
 xfs_fs_write_inode(
        struct inode            *inode,
        struct writeback_control *wbc)
@@ -902,10 +872,8 @@ xfs_fs_write_inode(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -XFS_ERROR(EIO);
-        if (!ip->i_update_core)
-                return 0;
-        if (wbc->sync_mode == WB_SYNC_ALL) {
+        if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
                /*
                 * Make sure the inode has made it it into the log.  Instead
                 * of forcing it all the way to stable storage using a
@@ -913,11 +881,14 @@ xfs_fs_write_inode(
                 * ->sync_fs call do that for thus, which reduces the number
                 * of synchronous log forces dramatically.
                 */
-                error = xfs_log_inode(ip);
+                error = xfs_log_dirty_inode(ip, NULL, 0);
                if (error)
                        goto out;
                return 0;
        } else {
+                if (!ip->i_update_core)
+                        return 0;
                /*
                 * We make this non-blocking if the inode is contended, return
                 * EAGAIN to indicate to the caller that they did not succeed.
@@ -1034,17 +1005,10 @@ xfs_fs_sync_fs(
        int                     error;
        /*
-         * Not much we can do for the first async pass.  Writing out the
+         * Doing anything during the async pass would be counterproductive.
-         * superblock would be counter-productive as we are going to redirty
-         * when writing out other data and metadata (and writing out a single
-         * block is quite fast anyway).
-         *
-         * Try to asynchronously kick off quota syncing at least.
         */
-        if (!wait) {
+        if (!wait)
-                xfs_qm_sync(mp, SYNC_TRYLOCK);
                return 0;
-        }
        error = xfs_quiesce_data(mp);
        if (error)
@@ -1258,9 +1222,9 @@ xfs_fs_unfreeze(
 STATIC int
 xfs_fs_show_options(
        struct seq_file         *m,
-        struct vfsmount         *mnt)
+        struct dentry           *root)
 {
-        return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+        return -xfs_showargs(XFS_M(root->d_sb), m);
 }
 /*
@@ -1641,12 +1605,12 @@ STATIC int __init
 xfs_init_workqueues(void)
 {
        /*
-         * max_active is set to 8 to give enough concurency to allow
+         * We never want to the same work item to run twice, reclaiming inodes
-         * multiple work operations on each CPU to run. This allows multiple
+         * or idling the log is not going to get any faster by multiple CPUs
-         * filesystems to be running sync work concurrently, and scales with
+         * competing for ressources.  Use the default large max_active value
-         * the number of CPUs in the system.
+         * so that even lots of filesystems can perform these task in parallel.
         */
-        xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+        xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
        if (!xfs_syncd_wq)
                return -ENOMEM;
        return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index be5c51d8f757..72c01a1c16e7 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
        return error;
 }
+int
+xfs_log_dirty_inode(
+        struct xfs_inode        *ip,
+        struct xfs_perag        *pag,
+        int                     flags)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_trans        *tp;
+        int                     error;
+        if (!ip->i_update_core)
+                return 0;
+        tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+        error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                return error;
+        }
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        return xfs_trans_commit(tp, 0);
+}
 /*
 * When remounting a filesystem read-only or freezing the filesystem, we have
 * two phases to execute. This first phase is syncing the data before we
@@ -359,10 +385,17 @@ xfs_quiesce_data(
 {
        int                     error, error2 = 0;
-        xfs_qm_sync(mp, SYNC_TRYLOCK);
+        /*
-        xfs_qm_sync(mp, SYNC_WAIT);
+         * Log all pending size and timestamp updates.  The vfs writeback
+         * code is supposed to do this, but due to its overagressive
+         * livelock detection it will skip inodes where appending writes
+         * were written out in the first non-blocking sync phase if their
+         * completion took long enough that it happened after taking the
+         * timestamp for the cut-off in the blocking phase.
+         */
+        xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
-        /* force out the newly dirtied log buffers */
+        /* force out the log */
        xfs_log_force(mp, XFS_LOG_SYNC);
        /* write superblock and hoover up shutdown errors */
@@ -470,7 +503,6 @@ xfs_sync_worker(
                        error = xfs_fs_log_dummy(mp);
                else
                        xfs_log_force(mp, 0);
-                error = xfs_qm_sync(mp, SYNC_TRYLOCK);
                /* start pushing all the metadata that is currently dirty */
                xfs_ail_push_all(mp->m_ail);
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 941202e7ac6e..fa965479d788 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,6 +34,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
 void xfs_flush_inodes(struct xfs_inode *ip);
+int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
 int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 int xfs_reclaim_inodes_count(struct xfs_mount *mp);
 void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 494035798873..a9d5b1e06efe 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
 DEFINE_DQUOT_EVENT(xfs_dqread);
 DEFINE_DQUOT_EVENT(xfs_dqread_fail);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
 DEFINE_DQUOT_EVENT(xfs_dqget_hit);
 DEFINE_DQUOT_EVENT(xfs_dqget_miss);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 1f35b2feca97..329b06aba1c2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1158,7 +1158,6 @@ xfs_trans_add_item(
        lidp->lid_item = lip;
        lidp->lid_flags = 0;
-        lidp->lid_size = 0;
        list_add_tail(&lidp->lid_trans, &tp->t_items);
        lip->li_desc = lidp;
@@ -1210,219 +1209,6 @@ xfs_trans_free_items(
        }
 }
-/*
- * Unlock the items associated with a transaction.
- *
- * Items which were not logged should be freed.  Those which were logged must
- * still be tracked so they can be unpinned when the transaction commits.
- */
-STATIC void
-xfs_trans_unlock_items(
-        struct xfs_trans        *tp,
-        xfs_lsn_t               commit_lsn)
-{
-        struct xfs_log_item_desc *lidp, *next;
-        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
-                struct xfs_log_item     *lip = lidp->lid_item;
-                lip->li_desc = NULL;
-                if (commit_lsn != NULLCOMMITLSN)
-                        IOP_COMMITTING(lip, commit_lsn);
-                IOP_UNLOCK(lip);
-                /*
-                 * Free the descriptor if the item is not dirty
-                 * within this transaction.
-                 */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        xfs_trans_free_item_desc(lidp);
-        }
-}
-/*
- * Total up the number of log iovecs needed to commit this
- * transaction.  The transaction itself needs one for the
- * transaction header.  Ask each dirty item in turn how many
- * it needs to get the total.
- */
-static uint
-xfs_trans_count_vecs(
-        struct xfs_trans        *tp)
-{
-        int                     nvecs;
-        struct xfs_log_item_desc *lidp;
-        nvecs = 1;
-        /* In the non-debug case we need to start bailing out if we
-         * didn't find a log_item here, return zero and let trans_commit
-         * deal with it.
-         */
-        if (list_empty(&tp->t_items)) {
-                ASSERT(0);
-                return 0;
-        }
-        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-                /*
-                 * Skip items which aren't dirty in this transaction.
-                 */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        continue;
-                lidp->lid_size = IOP_SIZE(lidp->lid_item);
-                nvecs += lidp->lid_size;
-        }
-        return nvecs;
-}
-/*
- * Fill in the vector with pointers to data to be logged
- * by this transaction.  The transaction header takes
- * the first vector, and then each dirty item takes the
- * number of vectors it indicated it needed in xfs_trans_count_vecs().
- *
- * As each item fills in the entries it needs, also pin the item
- * so that it cannot be flushed out until the log write completes.
- */
-static void
-xfs_trans_fill_vecs(
-        struct xfs_trans        *tp,
-        struct xfs_log_iovec    *log_vector)
-{
-        struct xfs_log_item_desc *lidp;
-        struct xfs_log_iovec    *vecp;
-        uint                    nitems;
-        /*
-         * Skip over the entry for the transaction header, we'll
-         * fill that in at the end.
-         */
-        vecp = log_vector + 1;
-        nitems = 0;
-        ASSERT(!list_empty(&tp->t_items));
-        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-                /* Skip items which aren't dirty in this transaction. */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        continue;
-                /*
-                 * The item may be marked dirty but not log anything.  This can
-                 * be used to get called when a transaction is committed.
-                 */
-                if (lidp->lid_size)
-                        nitems++;
-                IOP_FORMAT(lidp->lid_item, vecp);
-                vecp += lidp->lid_size;
-                IOP_PIN(lidp->lid_item);
-        }
-        /*
-         * Now that we've counted the number of items in this transaction, fill
-         * in the transaction header. Note that the transaction header does not
-         * have a log item.
-         */
-        tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
-        tp->t_header.th_type = tp->t_type;
-        tp->t_header.th_num_items = nitems;
-        log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
-        log_vector->i_len = sizeof(xfs_trans_header_t);
-        log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
-}
-/*
- * The committed item processing consists of calling the committed routine of
- * each logged item, updating the item's position in the AIL if necessary, and
- * unpinning each item.  If the committed routine returns -1, then do nothing
- * further with the item because it may have been freed.
- *
- * Since items are unlocked when they are copied to the incore log, it is
- * possible for two transactions to be completing and manipulating the same
- * item simultaneously.  The AIL lock will protect the lsn field of each item.
- * The value of this field can never go backwards.
- *
- * We unpin the items after repositioning them in the AIL, because otherwise
- * they could be immediately flushed and we'd have to race with the flusher
- * trying to pull the item from the AIL as we add it.
- */
-static void
-xfs_trans_item_committed(
-        struct xfs_log_item     *lip,
-        xfs_lsn_t               commit_lsn,
-        int                     aborted)
-{
-        xfs_lsn_t               item_lsn;
-        struct xfs_ail          *ailp;
-        if (aborted)
-                lip->li_flags |= XFS_LI_ABORTED;
-        item_lsn = IOP_COMMITTED(lip, commit_lsn);
-        /* item_lsn of -1 means the item needs no further processing */
-        if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
-                return;
-        /*
-         * If the returned lsn is greater than what it contained before, update
-         * the location of the item in the AIL.  If it is not, then do nothing.
-         * Items can never move backwards in the AIL.
-         *
-         * While the new lsn should usually be greater, it is possible that a
-         * later transaction completing simultaneously with an earlier one
-         * using the same item could complete first with a higher lsn.  This
-         * would cause the earlier transaction to fail the test below.
-         */
-        ailp = lip->li_ailp;
-        spin_lock(&ailp->xa_lock);
-        if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
-                /*
-                 * This will set the item's lsn to item_lsn and update the
-                 * position of the item in the AIL.
-                 *
-                 * xfs_trans_ail_update() drops the AIL lock.
-                 */
-                xfs_trans_ail_update(ailp, lip, item_lsn);
-        } else {
-                spin_unlock(&ailp->xa_lock);
-        }
-        /*
-         * Now that we've repositioned the item in the AIL, unpin it so it can
-         * be flushed. Pass information about buffer stale state down from the
-         * log item flags, if anyone else stales the buffer we do not want to
-         * pay any attention to it.
-         */
-        IOP_UNPIN(lip, 0);
-}
-/*
- * This is typically called by the LM when a transaction has been fully
- * committed to disk.  It needs to unpin the items which have
- * been logged by the transaction and update their positions
- * in the AIL if necessary.
- *
- * This also gets called when the transactions didn't get written out
- * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
- */
-STATIC void
-xfs_trans_committed(
-        void                    *arg,
-        int                     abortflag)
-{
-        struct xfs_trans        *tp = arg;
-        struct xfs_log_item_desc *lidp, *next;
-        list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
-                xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
-                xfs_trans_free_item_desc(lidp);
-        }
-        xfs_trans_free(tp);
-}
 static inline void
 xfs_log_item_batch_insert(
        struct xfs_ail          *ailp,
@@ -1538,258 +1324,6 @@ xfs_trans_committed_bulk(
 }
 /*
- * Called from the trans_commit code when we notice that the filesystem is in
- * the middle of a forced shutdown.
- *
- * When we are called here, we have already pinned all the items in the
- * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
- * so we can simply walk the items in the transaction, unpin them with an abort
- * flag and then free the items. Note that unpinning the items can result in
- * them being freed immediately, so we need to use a safe list traversal method
- * here.
- */
-STATIC void
-xfs_trans_uncommit(
-        struct xfs_trans        *tp,
-        uint                    flags)
-{
-        struct xfs_log_item_desc *lidp, *n;
-        list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
-                if (lidp->lid_flags & XFS_LID_DIRTY)
-                        IOP_UNPIN(lidp->lid_item, 1);
-        }
-        xfs_trans_unreserve_and_mod_sb(tp);
-        xfs_trans_unreserve_and_mod_dquots(tp);
-        xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
-        xfs_trans_free(tp);
-}
-/*
- * Format the transaction direct to the iclog. This isolates the physical
- * transaction commit operation from the logical operation and hence allows
- * other methods to be introduced without affecting the existing commit path.
- */
-static int
-xfs_trans_commit_iclog(
-        struct xfs_mount        *mp,
-        struct xfs_trans        *tp,
-        xfs_lsn_t               *commit_lsn,
-        int                     flags)
-{
-        int                     shutdown;
-        int                     error;
-        int                     log_flags = 0;
-        struct xlog_in_core     *commit_iclog;
-#define XFS_TRANS_LOGVEC_COUNT  16
-        struct xfs_log_iovec    log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
-        struct xfs_log_iovec    *log_vector;
-        uint                    nvec;
-        /*
-         * Ask each log item how many log_vector entries it will
-         * need so we can figure out how many to allocate.
-         * Try to avoid the kmem_alloc() call in the common case
-         * by using a vector from the stack when it fits.
-         */
-        nvec = xfs_trans_count_vecs(tp);
-        if (nvec == 0) {
-                return ENOMEM;  /* triggers a shutdown! */
-        } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
-                log_vector = log_vector_fast;
-        } else {
-                log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
-                                                   sizeof(xfs_log_iovec_t),
-                                                   KM_SLEEP);
-        }
-        /*
-         * Fill in the log_vector and pin the logged items, and
-         * then write the transaction to the log.
-         */
-        xfs_trans_fill_vecs(tp, log_vector);
-        if (flags & XFS_TRANS_RELEASE_LOG_RES)
-                log_flags = XFS_LOG_REL_PERM_RESERV;
-        error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
-        /*
-         * The transaction is committed incore here, and can go out to disk
-         * at any time after this call.  However, all the items associated
-         * with the transaction are still locked and pinned in memory.
-         */
-        *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
-        tp->t_commit_lsn = *commit_lsn;
-        trace_xfs_trans_commit_lsn(tp);
-        if (nvec > XFS_TRANS_LOGVEC_COUNT)
-                kmem_free(log_vector);
-        /*
-         * If we got a log write error. Unpin the logitems that we
-         * had pinned, clean up, free trans structure, and return error.
-         */
-        if (error || *commit_lsn == -1) {
-                current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-                xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
-                return XFS_ERROR(EIO);
-        }
-        /*
-         * Once the transaction has committed, unused
-         * reservations need to be released and changes to
-         * the superblock need to be reflected in the in-core
-         * version.  Do that now.
-         */
-        xfs_trans_unreserve_and_mod_sb(tp);
-        /*
-         * Tell the LM to call the transaction completion routine
-         * when the log write with LSN commit_lsn completes (e.g.
-         * when the transaction commit really hits the on-disk log).
-         * After this call we cannot reference tp, because the call
-         * can happen at any time and the call will free the transaction
-         * structure pointed to by tp.  The only case where we call
-         * the completion routine (xfs_trans_committed) directly is
-         * if the log is turned off on a debug kernel or we're
-         * running in simulation mode (the log is explicitly turned
-         * off).
-         */
-        tp->t_logcb.cb_func = xfs_trans_committed;
-        tp->t_logcb.cb_arg = tp;
-        /*
-         * We need to pass the iclog buffer which was used for the
-         * transaction commit record into this function, and attach
-         * the callback to it. The callback must be attached before
-         * the items are unlocked to avoid racing with other threads
-         * waiting for an item to unlock.
-         */
-        shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
-        /*
-         * Mark this thread as no longer being in a transaction
-         */
-        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-        /*
-         * Once all the items of the transaction have been copied
-         * to the in core log and the callback is attached, the
-         * items can be unlocked.
-         *
-         * This will free descriptors pointing to items which were
-         * not logged since there is nothing more to do with them.
-         * For items which were logged, we will keep pointers to them
-         * so they can be unpinned after the transaction commits to disk.
-         * This will also stamp each modified meta-data item with
-         * the commit lsn of this transaction for dependency tracking
-         * purposes.
-         */
-        xfs_trans_unlock_items(tp, *commit_lsn);
-        /*
-         * If we detected a log error earlier, finish committing
-         * the transaction now (unpin log items, etc).
-         *
-         * Order is critical here, to avoid using the transaction
-         * pointer after its been freed (by xfs_trans_committed
-         * either here now, or as a callback).  We cannot do this
-         * step inside xfs_log_notify as was done earlier because
-         * of this issue.
-         */
-        if (shutdown)
-                xfs_trans_committed(tp, XFS_LI_ABORTED);
-        /*
-         * Now that the xfs_trans_committed callback has been attached,
-         * and the items are released we can finally allow the iclog to
-         * go to disk.
-         */
-        return xfs_log_release_iclog(mp, commit_iclog);
-}
-/*
- * Walk the log items and allocate log vector structures for
- * each item large enough to fit all the vectors they require.
- * Note that this format differs from the old log vector format in
- * that there is no transaction header in these log vectors.
- */
-STATIC struct xfs_log_vec *
-xfs_trans_alloc_log_vecs(
-        xfs_trans_t     *tp)
-{
-        struct xfs_log_item_desc *lidp;
-        struct xfs_log_vec      *lv = NULL;
-        struct xfs_log_vec      *ret_lv = NULL;
-        /* Bail out if we didn't find a log item.  */
-        if (list_empty(&tp->t_items)) {
-                ASSERT(0);
-                return NULL;
-        }
-        list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-                struct xfs_log_vec *new_lv;
-                /* Skip items which aren't dirty in this transaction. */
-                if (!(lidp->lid_flags & XFS_LID_DIRTY))
-                        continue;
-                /* Skip items that do not have any vectors for writing */
-                lidp->lid_size = IOP_SIZE(lidp->lid_item);
-                if (!lidp->lid_size)
-                        continue;
-                new_lv = kmem_zalloc(sizeof(*new_lv) +
-                                lidp->lid_size * sizeof(struct xfs_log_iovec),
-                                KM_SLEEP);
-                /* The allocated iovec region lies beyond the log vector. */
-                new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
-                new_lv->lv_niovecs = lidp->lid_size;
-                new_lv->lv_item = lidp->lid_item;
-                if (!ret_lv)
-                        ret_lv = new_lv;
-                else
-                        lv->lv_next = new_lv;
-                lv = new_lv;
-        }
-        return ret_lv;
-}
-static int
-xfs_trans_commit_cil(
-        struct xfs_mount        *mp,
-        struct xfs_trans        *tp,
-        xfs_lsn_t               *commit_lsn,
-        int                     flags)
-{
-        struct xfs_log_vec      *log_vector;
-        /*
-         * Get each log item to allocate a vector structure for
-         * the log item to to pass to the log write code. The
-         * CIL commit code will format the vector and save it away.
-         */
-        log_vector = xfs_trans_alloc_log_vecs(tp);
-        if (!log_vector)
-                return ENOMEM;
-        xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
-        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-        xfs_trans_free(tp);
-        return 0;
-}
-/*
 * Commit the given transaction to the log.
 *
 * XFS disk error handling mechanism is not based on a typical
@@ -1845,17 +1379,16 @@ xfs_trans_commit(
                xfs_trans_apply_sb_deltas(tp);
        xfs_trans_apply_dquot_deltas(tp);
-        if (mp->m_flags & XFS_MOUNT_DELAYLOG)
+        error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
-                error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
-        else
-                error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
        if (error == ENOMEM) {
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
                error = XFS_ERROR(EIO);
                goto out_unreserve;
        }
+        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+        xfs_trans_free(tp);
        /*
         * If the transaction needs to be synchronous, then force the
         * log out now and wait for it.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3ae713c0abd9..f6118703f20d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
 */
 struct xfs_log_item_desc {
        struct xfs_log_item     *lid_item;
-        ushort                  lid_size;
-        unsigned char           lid_flags;
        struct list_head        lid_trans;
+        unsigned char           lid_flags;
 };
 #define XFS_LID_DIRTY           0x1
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 8b32d1a4c5a1..89dbb4a50872 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -53,7 +53,7 @@ xfs_dir_ialloc(
                                           output: may be a new transaction. */
        xfs_inode_t     *dp,            /* directory within whose allocate
                                           the inode. */
-        mode_t          mode,
+        umode_t         mode,
        xfs_nlink_t     nlink,
        xfs_dev_t       rdev,
        prid_t          prid,           /* project id */
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 456fca314933..5eeab4690cfe 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,7 +18,7 @@
 #ifndef __XFS_UTILS_H__
 #define __XFS_UTILS_H__
-extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
+extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, umode_t, xfs_nlink_t,
                                xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
 extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
 extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ce9268a2f56b..f2fea868d4db 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -822,7 +822,7 @@ int
 xfs_create(
        xfs_inode_t             *dp,
        struct xfs_name         *name,
-        mode_t                  mode,
+        umode_t                 mode,
        xfs_dev_t               rdev,
        xfs_inode_t             **ipp)
 {
@@ -1481,7 +1481,7 @@ xfs_symlink(
        xfs_inode_t             *dp,
        struct xfs_name         *link_name,
        const char              *target_path,
-        mode_t                  mode,
+        umode_t                 mode,
        xfs_inode_t             **ipp)
 {
        xfs_mount_t             *mp = dp->i_mount;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 35d3d513e1e9..0c877cbde142 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -26,7 +26,7 @@ int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
+int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode,
                xfs_dev_t rdev, struct xfs_inode **ipp);
 int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                struct xfs_inode *ip);
@@ -35,7 +35,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 int xfs_readdir(struct xfs_inode        *dp, void *dirent, size_t bufsize,
                       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-                const char *target_path, mode_t mode, struct xfs_inode **ipp);
+                const char *target_path, umode_t mode, struct xfs_inode **ipp);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_change_file_space(struct xfs_inode *ip, int cmd,
                xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);