Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus

author: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> 2008-05-19 01:09:05 -0400
committer: Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> 2008-05-19 01:09:05 -0400
commit: c203e45f069af47ca7623e4dcd8c00bfba2722e4 (patch)
tree: 4563115b6565dcfd97015c1c9366fb3d07cabf19 /fs
parent: a94477da38e0b261a7ecea71f4c95a3bcd5be69c (diff)
parent: b8291ad07a7f3b5b990900f0001198ac23ba893e (diff)
246 files changed, 7307 insertions, 2866 deletions
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 26e07df783b9..c3bbd6af996d 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,6 +22,21 @@
 #include <linux/list.h>
+/**
+ * struct v9fs_dentry - 9p private data stored in dentry d_fsdata
+ * @lock: protects the fidlist
+ * @fidlist: list of FIDs currently associated with this dentry
+ *
+ * This structure defines the 9p private data associated with
+ * a particular dentry.  In particular, this private data is used
+ * to lookup which 9P FID handle should be used for a particular VFS
+ * operation.  FID handles are associated with dentries instead of
+ * inodes in order to more closely map functionality to the Plan 9
+ * expected behavior for FID reclaimation and tracking.
+ *
+ * See Also: Mapping FIDs to Linux VFS model in
+ * Design and Implementation of the Linux 9P File System documentation
+ */
 struct v9fs_dentry {
        spinlock_t lock; /* protect fidlist */
        struct list_head fidlist;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 9b0f0222e8bb..047c791427aa 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -71,19 +71,19 @@ static match_table_t tokens = {
 /**
 * v9fs_parse_options - parse mount options into session structure
- * @options: options string passed from mount
 * @v9ses: existing v9fs session information
 *
+ * Return 0 upon success, -ERRNO upon failure.
 */
-static void v9fs_parse_options(struct v9fs_session_info *v9ses)
+static int v9fs_parse_options(struct v9fs_session_info *v9ses)
 {
        char *options;
        substring_t args[MAX_OPT_ARGS];
        char *p;
        int option = 0;
        char *s, *e;
-        int ret;
+        int ret = 0;
        /* setup defaults */
        v9ses->afid = ~0;
@@ -91,19 +91,26 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
        v9ses->cache = 0;
        if (!v9ses->options)
-                return;
+                return 0;
        options = kstrdup(v9ses->options, GFP_KERNEL);
+        if (!options) {
+                P9_DPRINTK(P9_DEBUG_ERROR,
+                           "failed to allocate copy of option string\n");
+                return -ENOMEM;
+        }
        while ((p = strsep(&options, ",")) != NULL) {
                int token;
                if (!*p)
                        continue;
                token = match_token(p, tokens, args);
                if (token < Opt_uname) {
-                        ret = match_int(&args[0], &option);
+                        int r = match_int(&args[0], &option);
-                        if (ret < 0) {
+                        if (r < 0) {
                                P9_DPRINTK(P9_DEBUG_ERROR,
                                        "integer field, but no integer?\n");
+                                ret = r;
                                continue;
                        }
                }
@@ -125,10 +132,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
                        v9ses->afid = option;
                        break;
                case Opt_uname:
-                        match_strcpy(v9ses->uname, &args[0]);
+                        match_strlcpy(v9ses->uname, &args[0], PATH_MAX);
                        break;
                case Opt_remotename:
-                        match_strcpy(v9ses->aname, &args[0]);
+                        match_strlcpy(v9ses->aname, &args[0], PATH_MAX);
                        break;
                case Opt_nodevmap:
                        v9ses->nodev = 1;
@@ -139,6 +146,13 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
                case Opt_access:
                        s = match_strdup(&args[0]);
+                        if (!s) {
+                                P9_DPRINTK(P9_DEBUG_ERROR,
+                                           "failed to allocate copy"
+                                           " of option argument\n");
+                                ret = -ENOMEM;
+                                break;
+                        }
                        v9ses->flags &= ~V9FS_ACCESS_MASK;
                        if (strcmp(s, "user") == 0)
                                v9ses->flags |= V9FS_ACCESS_USER;
@@ -158,6 +172,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses)
                }
        }
        kfree(options);
+        return ret;
 }
 /**
@@ -173,6 +188,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 {
        int retval = -EINVAL;
        struct p9_fid *fid;
+        int rc;
        v9ses->uname = __getname();
        if (!v9ses->uname)
@@ -190,8 +206,21 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
        v9ses->uid = ~0;
        v9ses->dfltuid = V9FS_DEFUID;
        v9ses->dfltgid = V9FS_DEFGID;
-        v9ses->options = kstrdup(data, GFP_KERNEL);
+        if (data) {
-        v9fs_parse_options(v9ses);
+                v9ses->options = kstrdup(data, GFP_KERNEL);
+                if (!v9ses->options) {
+                        P9_DPRINTK(P9_DEBUG_ERROR,
+                           "failed to allocate copy of option string\n");
+                        retval = -ENOMEM;
+                        goto error;
+                }
+        }
+        rc = v9fs_parse_options(v9ses);
+        if (rc < 0) {
+                retval = rc;
+                goto error;
+        }
        v9ses->clnt = p9_client_create(dev_name, v9ses->options);
@@ -233,7 +262,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
        return fid;
 error:
-        v9fs_session_close(v9ses);
        return ERR_PTR(retval);
 }
@@ -256,9 +284,12 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 }
 /**
- * v9fs_session_cancel - mark transport as disconnected
+ * v9fs_session_cancel - terminate a session
- *      and cancel all pending requests.
+ * @v9ses: session to terminate
+ *
+ * mark transport as disconnected and cancel all pending requests.
 */
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
        P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
        p9_client_disconnect(v9ses->clnt);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 7d3a1018db52..a7d567192998 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -21,18 +21,69 @@
 *
 */
-/*
+/**
-  * Session structure provides information for an opened session
+ * enum p9_session_flags - option flags for each 9P session
-  *
+ * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions
-  */
+ * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
+ * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
+ * @V9FS_ACCESS_ANY: use a single attach for all users
+ * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
+ *
+ * Session flags reflect options selected by users at mount time
+ */
+enum p9_session_flags {
+        V9FS_EXTENDED           = 0x01,
+        V9FS_ACCESS_SINGLE      = 0x02,
+        V9FS_ACCESS_USER        = 0x04,
+        V9FS_ACCESS_ANY         = 0x06,
+        V9FS_ACCESS_MASK        = 0x06,
+};
+/* possible values of ->cache */
+/**
+ * enum p9_cache_modes - user specified cache preferences
+ * @CACHE_NONE: do not cache data, dentries, or directory contents (default)
+ * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency
+ *
+ * eventually support loose, tight, time, session, default always none
+ */
+enum p9_cache_modes {
+        CACHE_NONE,
+        CACHE_LOOSE,
+};
+/**
+ * struct v9fs_session_info - per-instance session information
+ * @flags: session options of type &p9_session_flags
+ * @nodev: set to 1 to disable device mapping
+ * @debug: debug level
+ * @afid: authentication handle
+ * @cache: cache mode of type &p9_cache_modes
+ * @options: copy of options string given by user
+ * @uname: string user name to mount hierarchy as
+ * @aname: mount specifier for remote hierarchy
+ * @maxdata: maximum data to be sent/recvd per protocol message
+ * @dfltuid: default numeric userid to mount hierarchy as
+ * @dfltgid: default numeric groupid to mount hierarchy as
+ * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
+ * @clnt: reference to 9P network client instantiated for this session
+ * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ *
+ * This structure holds state for each session instance established during
+ * a sys_mount() .
+ *
+ * Bugs: there seems to be a lot of state which could be condensed and/or
+ * removed.
+ */
 struct v9fs_session_info {
        /* options */
-        unsigned char flags;    /* session flags */
+        unsigned char flags;
-        unsigned char nodev;    /* set to 1 if no disable device mapping */
+        unsigned char nodev;
-        unsigned short debug;   /* debug level */
+        unsigned short debug;
-        unsigned int afid;      /* authentication fid */
+        unsigned int afid;
-        unsigned int cache;     /* cache mode */
+        unsigned int cache;
        char *options;          /* copy of mount options */
        char *uname;            /* user name to mount as */
@@ -45,22 +96,6 @@ struct v9fs_session_info {
        struct dentry *debugfs_dir;
 };
-/* session flags */
-enum {
-        V9FS_EXTENDED           = 0x01, /* 9P2000.u */
-        V9FS_ACCESS_MASK        = 0x06, /* access mask */
-        V9FS_ACCESS_SINGLE      = 0x02, /* only one user can access the files */
-        V9FS_ACCESS_USER        = 0x04, /* attache per user */
-        V9FS_ACCESS_ANY         = 0x06, /* use the same attach for all users */
-};
-/* possible values of ->cache */
-/* eventually support loose, tight, time, session, default always none */
-enum {
-        CACHE_NONE,             /* default */
-        CACHE_LOOSE,            /* no consistency */
-};
 extern struct dentry *v9fs_debugfs_root;
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6248f0e727a3..97d3aed57983 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -43,7 +43,7 @@
 /**
 * v9fs_vfs_readpage - read an entire page in from 9P
 *
- * @file: file being read
+ * @filp: file being read
 * @page: structure to page
 *
 */
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 0924d4477da3..88e3787c6ea9 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -60,7 +60,7 @@ static inline int dt_type(struct p9_stat *mistat)
 /**
 * v9fs_dir_readdir - read a directory
- * @filep: opened file structure
+ * @filp: opened file structure
 * @dirent: directory structure ???
 * @filldir: function to populate directory structure ???
 *
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a616fff8906d..0d55affe37d4 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -90,10 +90,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 /**
 * v9fs_file_lock - lock a file (or directory)
- * @inode: inode to be opened
+ * @filp: file to be locked
- * @file: file being opened
+ * @cmd: lock command
+ * @fl: file lock structure
 *
- * XXX - this looks like a local only lock, we should extend into 9P
+ * Bugs: this looks like a local only lock, we should extend into 9P
 *       by using open exclusive
 */
@@ -118,7 +119,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 /**
 * v9fs_file_read - read from a file
- * @filep: file pointer to read
+ * @filp: file pointer to read
 * @data: data buffer to read data into
 * @count: size of buffer
 * @offset: offset at which to read data
@@ -142,7 +143,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count,
 /**
 * v9fs_file_write - write to a file
- * @filep: file pointer to write
+ * @filp: file pointer to write
 * @data: data buffer to write data from
 * @count: size of buffer
 * @offset: offset at which to write data
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 6a28842052ea..40fa807bd929 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -129,6 +129,12 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
        return res;
 }
+/**
+ * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits
+ * @uflags: flags to convert
+ *
+ */
 int v9fs_uflags2omode(int uflags)
 {
        int ret;
@@ -312,6 +318,14 @@ error:
 }
 */
+/**
+ * v9fs_inode_from_fid - populate an inode by issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
 static struct inode *
 v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
        struct super_block *sb)
@@ -384,9 +398,12 @@ v9fs_open_created(struct inode *inode, struct file *file)
 /**
 * v9fs_create - Create a file
+ * @v9ses: session information
+ * @dir: directory that dentry is being created in
 * @dentry:  dentry that is being created
 * @perm: create permissions
 * @mode: open mode
+ * @extension: 9p2000.u extension string to support devices, etc.
 *
 */
 static struct p9_fid *
@@ -461,7 +478,7 @@ error:
 /**
 * v9fs_vfs_create - VFS hook to create files
- * @inode: directory inode that is being created
+ * @dir: directory inode that is being created
 * @dentry:  dentry that is being deleted
 * @mode: create permissions
 * @nd: path information
@@ -519,7 +536,7 @@ error:
 /**
 * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
- * @inode:  inode that is being unlinked
+ * @dir:  inode that is being unlinked
 * @dentry: dentry that is being unlinked
 * @mode: mode for new directory
 *
@@ -703,9 +720,9 @@ done:
 /**
 * v9fs_vfs_getattr - retrieve file metadata
- * @mnt - mount information
+ * @mnt: mount information
- * @dentry - file to get attributes on
+ * @dentry: file to get attributes on
- * @stat - metadata structure to populate
+ * @stat: metadata structure to populate
 *
 */
@@ -928,7 +945,7 @@ done:
 /**
 * v9fs_vfs_readlink - read a symlink's location
 * @dentry: dentry for symlink
- * @buf: buffer to load symlink location into
+ * @buffer: buffer to load symlink location into
 * @buflen: length of buffer
 *
 */
@@ -996,10 +1013,12 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 * v9fs_vfs_put_link - release a symlink path
 * @dentry: dentry for symlink
 * @nd: nameidata
+ * @p: unused
 *
 */
-static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+static void
+v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 {
        char *s = nd_get_link(nd);
@@ -1008,6 +1027,15 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
                __putname(s);
 }
+/**
+ * v9fs_vfs_mkspecial - create a special file
+ * @dir: inode to create special file in
+ * @dentry: dentry to create
+ * @mode: mode to create special file
+ * @extension: 9p2000.u format extension string representing special file
+ *
+ */
 static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
        int mode, const char *extension)
 {
@@ -1037,7 +1065,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 * @dentry: dentry for symlink
 * @symname: symlink data
 *
- * See 9P2000.u RFC for more information
+ * See Also: 9P2000.u RFC for more information
 *
 */
@@ -1058,10 +1086,6 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 *
 */
-/* XXX - lots of code dup'd from symlink and creates,
- * figure out a better reuse strategy
- */
 static int
 v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
              struct dentry *dentry)
@@ -1098,7 +1122,7 @@ clunk_fid:
 * @dir: inode destination for new link
 * @dentry: dentry for file
 * @mode: mode for creation
- * @dev_t: device associated with special file
+ * @rdev: device associated with special file
 *
 */
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index a452ac67fc94..bf59c3960494 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -75,6 +75,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
 * v9fs_fill_super - populate superblock with info
 * @sb: superblock
 * @v9ses: session information
+ * @flags: flags propagated from v9fs_get_sb()
 *
 */
@@ -127,29 +128,26 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        fid = v9fs_session_init(v9ses, dev_name, data);
        if (IS_ERR(fid)) {
                retval = PTR_ERR(fid);
-                fid = NULL;
+                goto close_session;
-                kfree(v9ses);
-                v9ses = NULL;
-                goto error;
        }
        st = p9_client_stat(fid);
        if (IS_ERR(st)) {
                retval = PTR_ERR(st);
-                goto error;
+                goto clunk_fid;
        }
        sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
        if (IS_ERR(sb)) {
                retval = PTR_ERR(sb);
-                goto error;
+                goto free_stat;
        }
        v9fs_fill_super(sb, v9ses, flags);
        inode = v9fs_get_inode(sb, S_IFDIR | mode);
        if (IS_ERR(inode)) {
                retval = PTR_ERR(inode);
-                goto error;
+                goto release_sb;
        }
        inode->i_uid = uid;
@@ -158,7 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        root = d_alloc_root(inode);
        if (!root) {
                retval = -ENOMEM;
-                goto error;
+                goto release_sb;
        }
        sb->s_root = root;
@@ -169,21 +167,22 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
        return simple_set_mnt(mnt, sb);
-error:
+release_sb:
-        kfree(st);
-        if (fid)
-                p9_client_clunk(fid);
-        if (v9ses) {
-                v9fs_session_close(v9ses);
-                kfree(v9ses);
-        }
        if (sb) {
                up_write(&sb->s_umount);
                deactivate_super(sb);
        }
+free_stat:
+        kfree(st);
+clunk_fid:
+        p9_client_clunk(fid);
+close_session:
+        v9fs_session_close(v9ses);
+        kfree(v9ses);
        return retval;
 }
diff --git a/fs/Kconfig b/fs/Kconfig
index 2e43d46f65d6..cf12c403b8c7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
 config HUGETLBFS
        bool "HugeTLB file system support"
-        depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN
+        depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
+                   (S390 && 64BIT) || BROKEN
        help
          hugetlbfs is a filesystem backing for HugeTLB pages, based on
          ramfs. For architectures that support it, say Y here and read
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 853845abcca6..55e8ee1900a5 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC
          It is also possible to run FDPIC ELF binaries on MMU linux also.
 config BINFMT_FLAT
-        tristate "Kernel support for flat binaries"
+        bool "Kernel support for flat binaries"
        depends on !MMU
        help
          Support uClinux FLAT format binaries.
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 936f2af39c43..831157502d5a 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb);
 /* Misc */
 void __adfs_error(struct super_block *sb, const char *function,
                  const char *fmt, ...);
-#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt)
+#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
 /* super.c */
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index b9b2b27b68c3..ea7df2146921 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir)
                ptr.ptr8 = bufoff(bh, i);
                end.ptr8 = ptr.ptr8 + last - i;
-                do
+                do {
                        dircheck = *ptr.ptr8++ ^ ror13(dircheck);
-                while (ptr.ptr8 < end.ptr8);
+                } while (ptr.ptr8 < end.ptr8);
        }
        /*
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index d5bd497ab9cb..223b1917093e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -48,7 +48,7 @@ struct affs_ext_key {
 * affs fs inode data in memory
 */
 struct affs_inode_info {
-        u32      i_opencnt;
+        atomic_t i_opencnt;
        struct semaphore i_link_lock;           /* Protects internal inode access. */
        struct semaphore i_ext_lock;            /* Protects internal inode access. */
 #define i_hash_lock i_ext_lock
@@ -170,8 +170,6 @@ extern int	affs_rename(struct inode *old_dir, struct dentry *old_dentry,
 extern unsigned long             affs_parent_ino(struct inode *dir);
 extern struct inode             *affs_new_inode(struct inode *dir);
 extern int                       affs_notify_change(struct dentry *dentry, struct iattr *attr);
-extern void                      affs_put_inode(struct inode *inode);
-extern void                      affs_drop_inode(struct inode *inode);
 extern void                      affs_delete_inode(struct inode *inode);
 extern void                      affs_clear_inode(struct inode *inode);
 extern struct inode             *affs_iget(struct super_block *sb,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6e0c9399200e..6eac7bdeec94 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp)
 {
        if (atomic_read(&filp->f_count) != 1)
                return 0;
-        pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt);
+        pr_debug("AFFS: open(%lu,%d)\n",
-        AFFS_I(inode)->i_opencnt++;
+                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
+        atomic_inc(&AFFS_I(inode)->i_opencnt);
        return 0;
 }
@@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp)
 {
        if (atomic_read(&filp->f_count) != 0)
                return 0;
-        pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt);
+        pr_debug("AFFS: release(%lu, %d)\n",
-        AFFS_I(inode)->i_opencnt--;
+                 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
-        if (!AFFS_I(inode)->i_opencnt)
+        if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
+                mutex_lock(&inode->i_mutex);
+                if (inode->i_size != AFFS_I(inode)->mmu_private)
+                        affs_truncate(inode);
                affs_free_prealloc(inode);
+                mutex_unlock(&inode->i_mutex);
+        }
        return 0;
 }
@@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext)
        /* inline the simplest case: same extended block as last time */
        struct buffer_head *bh = AFFS_I(inode)->i_ext_bh;
        if (ext == AFFS_I(inode)->i_ext_last)
-                atomic_inc(&bh->b_count);
+                get_bh(bh);
        else
                /* we have to do more (not inlined) */
                bh = affs_get_extblock_slow(inode, ext);
@@ -306,7 +313,7 @@ store_ext:
        affs_brelse(AFFS_I(inode)->i_ext_bh);
        AFFS_I(inode)->i_ext_last = ext;
        AFFS_I(inode)->i_ext_bh = bh;
-        atomic_inc(&bh->b_count);
+        get_bh(bh);
        return bh;
@@ -324,9 +331,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
        pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
+        BUG_ON(block > (sector_t)0x7fffffffUL);
-        if (block > (sector_t)0x7fffffffUL)
-                BUG();
        if (block >= AFFS_I(inode)->i_blkcnt) {
                if (block > AFFS_I(inode)->i_blkcnt || !create)
@@ -493,8 +498,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
        u32 tmp;
        pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
-        if (from > to || to > PAGE_CACHE_SIZE)
+        BUG_ON(from > to || to > PAGE_CACHE_SIZE);
-                BUG();
        kmap(page);
        data = page_address(page);
        bsize = AFFS_SB(sb)->s_data_blksize;
@@ -507,8 +511,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
                if (IS_ERR(bh))
                        return PTR_ERR(bh);
                tmp = min(bsize - boff, to - from);
-                if (from + tmp > to || tmp > bsize)
+                BUG_ON(from + tmp > to || tmp > bsize);
-                        BUG();
                memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
                affs_brelse(bh);
                bidx++;
@@ -540,10 +543,9 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
                if (IS_ERR(bh))
                        return PTR_ERR(bh);
                tmp = min(bsize - boff, newsize - size);
-                if (boff + tmp > bsize || tmp > bsize)
+                BUG_ON(boff + tmp > bsize || tmp > bsize);
-                        BUG();
                memset(AFFS_DATA(bh) + boff, 0, tmp);
-                AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp);
+                be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
                affs_fix_checksum(sb, bh);
                mark_buffer_dirty_inode(bh, inode);
                size += tmp;
@@ -560,8 +562,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
                if (IS_ERR(bh))
                        goto out;
                tmp = min(bsize, newsize - size);
-                if (tmp > bsize)
+                BUG_ON(tmp > bsize);
-                        BUG();
                AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
                AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
                AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
@@ -683,10 +684,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
                if (IS_ERR(bh))
                        return PTR_ERR(bh);
                tmp = min(bsize - boff, to - from);
-                if (boff + tmp > bsize || tmp > bsize)
+                BUG_ON(boff + tmp > bsize || tmp > bsize);
-                        BUG();
                memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
-                AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp);
+                be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
                affs_fix_checksum(sb, bh);
                mark_buffer_dirty_inode(bh, inode);
                written += tmp;
@@ -732,8 +732,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
                if (IS_ERR(bh))
                        goto out;
                tmp = min(bsize, to - from);
-                if (tmp > bsize)
+                BUG_ON(tmp > bsize);
-                        BUG();
                memcpy(AFFS_DATA(bh), data + from, tmp);
                if (buffer_new(bh)) {
                        AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -834,6 +833,8 @@ affs_truncate(struct inode *inode)
                res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
                if (!res)
                        res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
+                else
+                        inode->i_size = AFFS_I(inode)->mmu_private;
                mark_inode_dirty(inode);
                return;
        } else if (inode->i_size == AFFS_I(inode)->mmu_private)
@@ -869,6 +870,7 @@ affs_truncate(struct inode *inode)
                blk++;
        } else
                AFFS_HEAD(ext_bh)->first_data = 0;
+        AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i);
        size = AFFS_SB(sb)->s_hashsize;
        if (size > blkcnt - blk + i)
                size = blkcnt - blk + i;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 27fe6cbe43ae..a13b334a3910 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
        AFFS_I(inode)->i_extcnt = 1;
        AFFS_I(inode)->i_ext_last = ~1;
        AFFS_I(inode)->i_protect = prot;
-        AFFS_I(inode)->i_opencnt = 0;
+        atomic_set(&AFFS_I(inode)->i_opencnt, 0);
        AFFS_I(inode)->i_blkcnt = 0;
        AFFS_I(inode)->i_lc = NULL;
        AFFS_I(inode)->i_lc_size = 0;
@@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
                        inode->i_mode |= S_IFDIR;
                } else
                        inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR;
-                if (tail->link_chain)
-                        inode->i_nlink = 2;
                /* Maybe it should be controlled by mount parameter? */
                //inode->i_mode |= S_ISVTX;
                inode->i_op = &affs_dir_inode_operations;
@@ -245,31 +243,12 @@ out:
 }
 void
-affs_put_inode(struct inode *inode)
-{
-        pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
-        affs_free_prealloc(inode);
-}
-void
-affs_drop_inode(struct inode *inode)
-{
-        mutex_lock(&inode->i_mutex);
-        if (inode->i_size != AFFS_I(inode)->mmu_private)
-                affs_truncate(inode);
-        mutex_unlock(&inode->i_mutex);
-        generic_drop_inode(inode);
-}
-void
 affs_delete_inode(struct inode *inode)
 {
        pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
        truncate_inode_pages(&inode->i_data, 0);
        inode->i_size = 0;
-        if (S_ISREG(inode->i_mode))
+        affs_truncate(inode);
-                affs_truncate(inode);
        clear_inode(inode);
        affs_free_block(inode->i_sb, inode->i_ino);
 }
@@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode)
 void
 affs_clear_inode(struct inode *inode)
 {
-        unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc;
+        unsigned long cache_page;
        pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+        affs_free_prealloc(inode);
+        cache_page = (unsigned long)AFFS_I(inode)->i_lc;
        if (cache_page) {
                pr_debug("AFFS: freeing ext cache\n");
                AFFS_I(inode)->i_lc = NULL;
@@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir)
        inode->i_ino     = block;
        inode->i_nlink   = 1;
        inode->i_mtime   = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-        AFFS_I(inode)->i_opencnt = 0;
+        atomic_set(&AFFS_I(inode)->i_opencnt, 0);
        AFFS_I(inode)->i_blkcnt = 0;
        AFFS_I(inode)->i_lc = NULL;
        AFFS_I(inode)->i_lc_size = 0;
@@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
        switch (type) {
        case ST_LINKFILE:
        case ST_LINKDIR:
-                inode_bh = bh;
                retval = -ENOSPC;
                block = affs_alloc_block(dir, dir->i_ino);
                if (!block)
                        goto err;
                retval = -EIO;
+                inode_bh = bh;
                bh = affs_getzeroblk(sb, block);
                if (!bh)
                        goto err;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 2218f1ee71ce..cfcf1b6cf82b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 int
 affs_unlink(struct inode *dir, struct dentry *dentry)
 {
-        pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino,
+        pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
+                 dentry->d_inode->i_ino,
                 (int)dentry->d_name.len, dentry->d_name.name);
        return affs_remove_header(dentry);
@@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 int
 affs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-        pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino,
+        pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
+                 dentry->d_inode->i_ino,
                 (int)dentry->d_name.len, dentry->d_name.name);
        return affs_remove_header(dentry);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d2dc047cb479..d214837d5e42 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep;
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
-        struct affs_inode_info *ei;
+        struct affs_inode_info *i;
-        ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
-        if (!ei)
+        i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
+        if (!i)
                return NULL;
-        ei->vfs_inode.i_version = 1;
-        return &ei->vfs_inode;
+        i->vfs_inode.i_version = 1;
+        i->i_lc = NULL;
+        i->i_ext_bh = NULL;
+        i->i_pa_cnt = 0;
+        return &i->vfs_inode;
 }
 static void affs_destroy_inode(struct inode *inode)
@@ -114,8 +120,6 @@ static const struct super_operations affs_sops = {
        .alloc_inode    = affs_alloc_inode,
        .destroy_inode  = affs_destroy_inode,
        .write_inode    = affs_write_inode,
-        .put_inode      = affs_put_inode,
-        .drop_inode     = affs_drop_inode,
        .delete_inode   = affs_delete_inode,
        .clear_inode    = affs_clear_inode,
        .put_super      = affs_put_super,
@@ -199,7 +203,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
                case Opt_prefix:
                        /* Free any previous prefix */
                        kfree(*prefix);
-                        *prefix = NULL;
                        *prefix = match_strdup(&args[0]);
                        if (!*prefix)
                                return 0;
@@ -233,6 +236,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
                        break;
                case Opt_volume: {
                        char *vol = match_strdup(&args[0]);
+                        if (!vol)
+                                return 0;
                        strlcpy(volume, vol, 32);
                        kfree(vol);
                        break;
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 7b4d4fab4c80..255f5dd6040c 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -24,7 +24,8 @@ enum AFS_CM_Operations {
        CBGetXStatsVersion      = 209,  /* get version of extended statistics */
        CBGetXStats             = 210,  /* get contents of extended statistics data */
        CBInitCallBackState3    = 213,  /* initialise callback state, version 3 */
-        CBGetCapabilities       = 65538, /* get client capabilities */
+        CBProbeUuid             = 214,  /* check the client hasn't rebooted */
+        CBTellMeAboutYourself   = 65538, /* get client capabilities */
 };
 #define AFS_CAP_ERROR_TRANSLATION       0x1
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 584bb0f9c36a..5e1df14e16b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,7 +20,7 @@
 DECLARE_RWSEM(afs_proc_cells_sem);
 LIST_HEAD(afs_proc_cells);
-static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells);
+static LIST_HEAD(afs_cells);
 static DEFINE_RWLOCK(afs_cells_lock);
 static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
 static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 47b71c8947f9..eb765489164f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
                                                struct sk_buff *, bool);
 static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
 static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *,
+static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
-                                           bool);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
+                                                 struct sk_buff *, bool);
 static void afs_cm_destructor(struct afs_call *);
 /*
@@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = {
 };
 /*
- * CB.GetCapabilities operation type
+ * CB.ProbeUuid operation type
 */
-static const struct afs_call_type afs_SRXCBGetCapabilites = {
+static const struct afs_call_type afs_SRXCBProbeUuid = {
-        .name           = "CB.GetCapabilities",
+        .name           = "CB.ProbeUuid",
-        .deliver        = afs_deliver_cb_get_capabilities,
+        .deliver        = afs_deliver_cb_probe_uuid,
+        .abort_to_error = afs_abort_to_error,
+        .destructor     = afs_cm_destructor,
+};
+/*
+ * CB.TellMeAboutYourself operation type
+ */
+static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
+        .name           = "CB.TellMeAboutYourself",
+        .deliver        = afs_deliver_cb_tell_me_about_yourself,
        .abort_to_error = afs_abort_to_error,
        .destructor     = afs_cm_destructor,
 };
@@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
        case CBProbe:
                call->type = &afs_SRXCBProbe;
                return true;
-        case CBGetCapabilities:
+        case CBTellMeAboutYourself:
-                call->type = &afs_SRXCBGetCapabilites;
+                call->type = &afs_SRXCBTellMeAboutYourself;
                return true;
        default:
                return false;
@@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
 }
 /*
+ * allow the fileserver to quickly find out if the fileserver has been rebooted
+ */
+static void SRXAFSCB_ProbeUuid(struct work_struct *work)
+{
+        struct afs_call *call = container_of(work, struct afs_call, work);
+        struct afs_uuid *r = call->request;
+        struct {
+                __be32  match;
+        } reply;
+        _enter("");
+        if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
+                reply.match = htonl(0);
+        else
+                reply.match = htonl(1);
+        afs_send_simple_reply(call, &reply, sizeof(reply));
+        _leave("");
+}
+/*
+ * deliver request data to a CB.ProbeUuid call
+ */
+static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
+                                     bool last)
+{
+        struct afs_uuid *r;
+        unsigned loop;
+        __be32 *b;
+        int ret;
+        _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+        if (skb->len > 0)
+                return -EBADMSG;
+        if (!last)
+                return 0;
+        switch (call->unmarshall) {
+        case 0:
+                call->offset = 0;
+                call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
+                if (!call->buffer)
+                        return -ENOMEM;
+                call->unmarshall++;
+        case 1:
+                _debug("extract UUID");
+                ret = afs_extract_data(call, skb, last, call->buffer,
+                                       11 * sizeof(__be32));
+                switch (ret) {
+                case 0:         break;
+                case -EAGAIN:   return 0;
+                default:        return ret;
+                }
+                _debug("unmarshall UUID");
+                call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+                if (!call->request)
+                        return -ENOMEM;
+                b = call->buffer;
+                r = call->request;
+                r->time_low                     = ntohl(b[0]);
+                r->time_mid                     = ntohl(b[1]);
+                r->time_hi_and_version          = ntohl(b[2]);
+                r->clock_seq_hi_and_reserved    = ntohl(b[3]);
+                r->clock_seq_low                = ntohl(b[4]);
+                for (loop = 0; loop < 6; loop++)
+                        r->node[loop] = ntohl(b[loop + 5]);
+                call->offset = 0;
+                call->unmarshall++;
+        case 2:
+                _debug("trailer");
+                if (skb->len != 0)
+                        return -EBADMSG;
+                break;
+        }
+        if (!last)
+                return 0;
+        call->state = AFS_CALL_REPLYING;
+        INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
+        schedule_work(&call->work);
+        return 0;
+}
+/*
 * allow the fileserver to ask about the cache manager's capabilities
 */
-static void SRXAFSCB_GetCapabilities(struct work_struct *work)
+static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
 {
        struct afs_interface *ifs;
        struct afs_call *call = container_of(work, struct afs_call, work);
@@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work)
 }
 /*
- * deliver request data to a CB.GetCapabilities call
+ * deliver request data to a CB.TellMeAboutYourself call
 */
-static int afs_deliver_cb_get_capabilities(struct afs_call *call,
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
-                                           struct sk_buff *skb, bool last)
+                                                 struct sk_buff *skb, bool last)
 {
        _enter(",{%u},%d", skb->len, last);
@@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call,
        /* no unmarshalling required */
        call->state = AFS_CALL_REPLYING;
-        INIT_WORK(&call->work, SRXAFSCB_GetCapabilities);
+        INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
        schedule_work(&call->work);
        return 0;
 }
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b58af8f18bc4..dfda03d4397d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
        if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
                printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
-                       __FUNCTION__, dir->i_ino, qty,
+                       __func__, dir->i_ino, qty,
                       ntohs(dbuf->blocks[0].pagehdr.npages));
                goto error;
        }
@@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
        for (tmp = 0; tmp < qty; tmp++) {
                if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
                        printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
-                               __FUNCTION__, dir->i_ino, tmp, qty,
+                               __func__, dir->i_ino, tmp, qty,
                               ntohs(dbuf->blocks[tmp].pagehdr.magic));
                        goto error;
                }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index eec41c76de72..7102824ba847 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...)
 {
 }
-#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
-#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
 #define kdebug(FMT,...) dbgprintk("    "FMT ,##__VA_ARGS__)
@@ -791,8 +791,8 @@ do {							\
 } while (0)
 #else
-#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
-#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
 #define _debug(FMT,...) _dbprintk("    "FMT ,##__VA_ARGS__)
 #endif
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 846c7615ac9e..9f7d1ae70269 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = {
        .write          = afs_proc_cells_write,
        .llseek         = seq_lseek,
        .release        = seq_release,
+        .owner          = THIS_MODULE,
 };
 static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
@@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = {
        .read           = afs_proc_rootcell_read,
        .write          = afs_proc_rootcell_write,
        .llseek         = no_llseek,
-        .release        = afs_proc_rootcell_release
+        .release        = afs_proc_rootcell_release,
+        .owner          = THIS_MODULE,
 };
 static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
@@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = {
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = afs_proc_cell_volumes_release,
+        .owner          = THIS_MODULE,
 };
 static int afs_proc_cell_vlservers_open(struct inode *inode,
@@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = afs_proc_cell_vlservers_release,
+        .owner          = THIS_MODULE,
 };
 static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
@@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = {
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = afs_proc_cell_servers_release,
+        .owner          = THIS_MODULE,
 };
 /*
@@ -143,17 +148,13 @@ int afs_proc_init(void)
                goto error_dir;
        proc_afs->owner = THIS_MODULE;
-        p = create_proc_entry("cells", 0, proc_afs);
+        p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
        if (!p)
                goto error_cells;
-        p->proc_fops = &afs_proc_cells_fops;
-        p->owner = THIS_MODULE;
-        p = create_proc_entry("rootcell", 0, proc_afs);
+        p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
        if (!p)
                goto error_rootcell;
-        p->proc_fops = &afs_proc_rootcell_fops;
-        p->owner = THIS_MODULE;
        _leave(" = 0");
        return 0;
@@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell)
        if (!cell->proc_dir)
                goto error_dir;
-        p = create_proc_entry("servers", 0, cell->proc_dir);
+        p = proc_create_data("servers", 0, cell->proc_dir,
+                             &afs_proc_cell_servers_fops, cell);
        if (!p)
                goto error_servers;
-        p->proc_fops = &afs_proc_cell_servers_fops;
-        p->owner = THIS_MODULE;
-        p->data = cell;
-        p = create_proc_entry("vlservers", 0, cell->proc_dir);
+        p = proc_create_data("vlservers", 0, cell->proc_dir,
+                             &afs_proc_cell_vlservers_fops, cell);
        if (!p)
                goto error_vlservers;
-        p->proc_fops = &afs_proc_cell_vlservers_fops;
-        p->owner = THIS_MODULE;
-        p->data = cell;
-        p = create_proc_entry("volumes", 0, cell->proc_dir);
+        p = proc_create_data("volumes", 0, cell->proc_dir,
+                             &afs_proc_cell_volumes_fops, cell);
        if (!p)
                goto error_volumes;
-        p->proc_fops = &afs_proc_cell_volumes_fops;
-        p->owner = THIS_MODULE;
-        p->data = cell;
        _leave(" = 0");
        return 0;
diff --git a/fs/aio.c b/fs/aio.c
index ae94e1dea266..b5253e77eb2f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx)
        kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
 } while(0)
+/* __put_ioctx
+ *      Called when the last user of an aio context has gone away,
+ *      and the struct needs to be freed.
+ */
+static void __put_ioctx(struct kioctx *ctx)
+{
+        unsigned nr_events = ctx->max_reqs;
+        BUG_ON(ctx->reqs_active);
+        cancel_delayed_work(&ctx->wq);
+        cancel_work_sync(&ctx->wq.work);
+        aio_free_ring(ctx);
+        mmdrop(ctx->mm);
+        ctx->mm = NULL;
+        pr_debug("__put_ioctx: freeing %p\n", ctx);
+        kmem_cache_free(kioctx_cachep, ctx);
+        if (nr_events) {
+                spin_lock(&aio_nr_lock);
+                BUG_ON(aio_nr - nr_events > aio_nr);
+                aio_nr -= nr_events;
+                spin_unlock(&aio_nr_lock);
+        }
+}
+#define get_ioctx(kioctx) do {                                          \
+        BUG_ON(atomic_read(&(kioctx)->users) <= 0);                     \
+        atomic_inc(&(kioctx)->users);                                   \
+} while (0)
+#define put_ioctx(kioctx) do {                                          \
+        BUG_ON(atomic_read(&(kioctx)->users) <= 0);                     \
+        if (unlikely(atomic_dec_and_test(&(kioctx)->users)))            \
+                __put_ioctx(kioctx);                                    \
+} while (0)
 /* ioctx_alloc
 *      Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
 */
@@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        if (ctx->max_reqs == 0)
                goto out_cleanup;
-        /* now link into global list.  kludge.  FIXME */
+        /* now link into global list. */
        write_lock(&mm->ioctx_list_lock);
        ctx->next = mm->ioctx_list;
        mm->ioctx_list = ctx;
@@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm)
        }
 }
-/* __put_ioctx
- *      Called when the last user of an aio context has gone away,
- *      and the struct needs to be freed.
- */
-void __put_ioctx(struct kioctx *ctx)
-{
-        unsigned nr_events = ctx->max_reqs;
-        BUG_ON(ctx->reqs_active);
-        cancel_delayed_work(&ctx->wq);
-        cancel_work_sync(&ctx->wq.work);
-        aio_free_ring(ctx);
-        mmdrop(ctx->mm);
-        ctx->mm = NULL;
-        pr_debug("__put_ioctx: freeing %p\n", ctx);
-        kmem_cache_free(kioctx_cachep, ctx);
-        if (nr_events) {
-                spin_lock(&aio_nr_lock);
-                BUG_ON(aio_nr - nr_events > aio_nr);
-                aio_nr -= nr_events;
-                spin_unlock(&aio_nr_lock);
-        }
-}
 /* aio_get_req
 *      Allocate a slot for an aio request.  Increments the users count
 * of the kioctx so that the kioctx stays around until all requests are
@@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req)
        return ret;
 }
-/*      Lookup an ioctx id.  ioctx_list is lockless for reads.
+static struct kioctx *lookup_ioctx(unsigned long ctx_id)
- *      FIXME: this is O(n) and is only suitable for development.
- */
-struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
        struct kioctx *ioctx;
        struct mm_struct *mm;
@@ -1070,9 +1078,7 @@ static void timeout_func(unsigned long data)
 static inline void init_timeout(struct aio_timeout *to)
 {
-        init_timer(&to->timer);
+        setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
-        to->timer.data = (unsigned long)to;
-        to->timer.function = timeout_func;
        to->timed_out = 0;
        to->p = current;
 }
@@ -1205,6 +1211,7 @@ retry:
        if (timeout)
                clear_timeout(&to);
 out:
+        destroy_timer_on_stack(&to.timer);
        return i ? i : ret;
 }
@@ -1552,7 +1559,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
        return 1;
 }
-int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                         struct iocb *iocb)
 {
        struct kiocb *req;
@@ -1593,7 +1600,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                 * event using the eventfd_signal() function.
                 */
                req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
-                if (unlikely(IS_ERR(req->ki_eventfd))) {
+                if (IS_ERR(req->ki_eventfd)) {
                        ret = PTR_ERR(req->ki_eventfd);
                        goto out_put_req;
                }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be069e085..977ef208c051 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
 *                    anonymous inode, and a dentry that describe the "class"
 *                    of the file
 *
- * @pfd:     [out]   pointer to the file descriptor
- * @dpinode: [out]   pointer to the inode
- * @pfile:   [out]   pointer to the file struct
 * @name:    [in]    name of the "class" of the new file
 * @fops     [in]    file operations for the new file
 * @priv     [in]    private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
 * that do not need to have a full-fledged inode in order to operate correctly.
 * All the files created with anon_inode_getfd() will share a single inode,
 * hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup.
+ * setup.  Returns new descriptor or -error.
 */
-int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
-                     const char *name, const struct file_operations *fops,
                     void *priv)
 {
        struct qstr this;
@@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
        fd_install(fd, file);
-        *pfd = fd;
+        return fd;
-        *pinode = anon_inode_inode;
-        *pfile = file;
-        return 0;
 err_dput:
        dput(dentry);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2d4ae40718d9..c3d352d7fa93 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -35,7 +35,7 @@
 /* #define DEBUG */
 #ifdef DEBUG
-#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0)
+#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
 #else
 #define DPRINTK(fmt,args...) do {} while(0)
 #endif
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d96e5c14a9ca..894fee54d4d8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
        status = 0;
 done:
        DPRINTK("returning = %d", status);
-        mntput(mnt);
        dput(dentry);
+        mntput(mnt);
        return status;
 }
@@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                        /* Can we expire this guy */
                        if (autofs4_can_expire(dentry, timeout, do_now)) {
                                expired = dentry;
-                                break;
+                                goto found;
                        }
                        goto next;
                }
@@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                                inf->flags |= AUTOFS_INF_EXPIRING;
                                spin_unlock(&sbi->fs_lock);
                                expired = dentry;
-                                break;
+                                goto found;
                        }
                        spin_unlock(&sbi->fs_lock);
                /*
@@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
                        expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
                        if (expired) {
                                dput(dentry);
-                                break;
+                                goto found;
                        }
                }
 next:
@@ -371,18 +371,16 @@ next:
                spin_lock(&dcache_lock);
                next = next->next;
        }
-        if (expired) {
-                DPRINTK("returning %p %.*s",
-                        expired, (int)expired->d_name.len, expired->d_name.name);
-                spin_lock(&dcache_lock);
-                list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
-                spin_unlock(&dcache_lock);
-                return expired;
-        }
        spin_unlock(&dcache_lock);
        return NULL;
+found:
+        DPRINTK("returning %p %.*s",
+                expired, (int)expired->d_name.len, expired->d_name.name);
+        spin_lock(&dcache_lock);
+        list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+        spin_unlock(&dcache_lock);
+        return expired;
 }
 /* Perform an expiry operation */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a54a946a50ae..edf5b6bddb52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
        if (d_mountpoint(dentry)) {
                struct file *fp = NULL;
-                struct vfsmount *fp_mnt = mntget(mnt);
+                struct path fp_path = { .dentry = dentry, .mnt = mnt };
-                struct dentry *fp_dentry = dget(dentry);
-                if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
+                path_get(&fp_path);
-                        dput(fp_dentry);
-                        mntput(fp_mnt);
+                if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
+                        path_put(&fp_path);
                        dcache_dir_close(inode, file);
                        goto out;
                }
-                fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
+                fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
                status = PTR_ERR(fp);
                if (IS_ERR(fp)) {
                        dcache_dir_close(inode, file);
@@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
 {
        struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
        struct autofs_info *ino = autofs4_dentry_ino(dentry);
-        int status = 0;
+        struct dentry *new;
+        int status;
        /* Block on any pending expiry here; invalidate the dentry
           when expiration is done to trigger mount request with a new
@@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
        spin_lock(&dentry->d_lock);
        dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
        spin_unlock(&dentry->d_lock);
-        return status;
+        /*
+         * The dentry that is passed in from lookup may not be the one
+         * we end up using, as mkdir can create a new one.  If this
+         * happens, and another process tries the lookup at the same time,
+         * it will set the PENDING flag on this new dentry, but add itself
+         * to our waitq.  Then, if after the lookup succeeds, the first
+         * process that requested the mount performs another lookup of the
+         * same directory, it will show up as still pending!  So, we need
+         * to redo the lookup here and clear pending on that dentry.
+         */
+        if (d_unhashed(dentry)) {
+                new = d_lookup(dentry->d_parent, &dentry->d_name);
+                if (new) {
+                        spin_lock(&new->d_lock);
+                        new->d_flags &= ~DCACHE_AUTOFS_PENDING;
+                        spin_unlock(&new->d_lock);
+                        dput(new);
+                }
+        }
+        return 0;
 }
 /* For autofs direct mounts the follow link triggers the mount */
@@ -533,9 +555,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
                        goto next;
                if (d_unhashed(dentry)) {
-                        struct autofs_info *ino = autofs4_dentry_ino(dentry);
                        struct inode *inode = dentry->d_inode;
+                        ino = autofs4_dentry_ino(dentry);
                        list_del_init(&ino->rehash);
                        dget(dentry);
                        /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1fe28e4754c2..75e5955c3f6d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
        for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
                len += tmp->d_name.len + 1;
-        if (--len > NAME_MAX) {
+        if (!len || --len > NAME_MAX) {
                spin_unlock(&dcache_lock);
                return 0;
        }
diff --git a/fs/befs/endian.h b/fs/befs/endian.h
index e254a20869f4..6cb84d896d05 100644
--- a/fs/befs/endian.h
+++ b/fs/befs/endian.h
@@ -9,7 +9,7 @@
 #ifndef LINUX_BEFS_ENDIAN
 #define LINUX_BEFS_ENDIAN
-#include <linux/byteorder/generic.h>
+#include <asm/byteorder.h>
 static inline u64
 fs64_to_cpu(const struct super_block *sb, fs64 n)
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 82123ff3e1dd..e8717de3bab3 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 {
        befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
        if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
-                char *p = nd_get_link(nd);
+                char *link = nd_get_link(nd);
-                if (!IS_ERR(p))
+                if (!IS_ERR(link))
-                        kfree(p);
+                        kfree(link);
        }
 }
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 71faf4d23908..70f5d3a8eede 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
 #define printf(format, args...) \
-        printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args)
+        printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args)
 /* inode.c */
 extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a1bb2244cac7..ba4cddb92f1d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
                         
                flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
        } else {
-                static unsigned long error_time, error_time2;
                if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
-                    (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
+                    (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
                {
                        printk(KERN_NOTICE "executable not page aligned\n");
-                        error_time2 = jiffies;
                }
-                if ((fd_offset & ~PAGE_MASK) != 0 &&
+                if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
-                    (jiffies-error_time) > 5*HZ)
                {
                        printk(KERN_WARNING 
                               "fd_offset is not page aligned. Please convert program: %s\n",
                               bprm->file->f_path.dentry->d_name.name);
-                        error_time = jiffies;
                }
                if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -495,15 +491,13 @@ static int load_aout_library(struct file *file)
        start_addr =  ex.a_entry & 0xfffff000;
        if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
-                static unsigned long error_time;
                loff_t pos = N_TXTOFF(ex);
-                if ((jiffies-error_time) > 5*HZ)
+                if (printk_ratelimit())
                {
                        printk(KERN_WARNING 
                               "N_TXTOFF is not page aligned. Please convert library: %s\n",
                               file->f_path.dentry->d_name.name);
-                        error_time = jiffies;
                }
                down_write(&current->mm->mmap_sem);
                do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9924581df6f6..b25707fee2cc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file,
 static void fill_elf_header(struct elfhdr *elf, int segs,
                            u16 machine, u32 flags, u8 osabi)
 {
+        memset(elf, 0, sizeof(*elf));
        memcpy(elf->e_ident, ELFMAG, SELFMAG);
        elf->e_ident[EI_CLASS] = ELF_CLASS;
        elf->e_ident[EI_DATA] = ELF_DATA;
        elf->e_ident[EI_VERSION] = EV_CURRENT;
        elf->e_ident[EI_OSABI] = ELF_OSABI;
-        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
        elf->e_type = ET_CORE;
        elf->e_machine = machine;
        elf->e_version = EV_CURRENT;
-        elf->e_entry = 0;
        elf->e_phoff = sizeof(struct elfhdr);
-        elf->e_shoff = 0;
        elf->e_flags = flags;
        elf->e_ehsize = sizeof(struct elfhdr);
        elf->e_phentsize = sizeof(struct elf_phdr);
        elf->e_phnum = segs;
-        elf->e_shentsize = 0;
-        elf->e_shnum = 0;
-        elf->e_shstrndx = 0;
        return;
 }
@@ -1725,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
        info->thread_status_size = 0;
        if (signr) {
-                struct elf_thread_status *tmp;
+                struct elf_thread_status *ets;
                rcu_read_lock();
                do_each_thread(g, p)
                        if (current->mm == p->mm && current != p) {
-                                tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
+                                ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
-                                if (!tmp) {
+                                if (!ets) {
                                        rcu_read_unlock();
                                        return 0;
                                }
-                                tmp->thread = p;
+                                ets->thread = p;
-                                list_add(&tmp->list, &info->thread_list);
+                                list_add(&ets->list, &info->thread_list);
                        }
                while_each_thread(g, p);
                rcu_read_unlock();
                list_for_each(t, &info->thread_list) {
-                        struct elf_thread_status *tmp;
                        int sz;
-                        tmp = list_entry(t, struct elf_thread_status, list);
+                        ets = list_entry(t, struct elf_thread_status, list);
-                        sz = elf_dump_thread_status(signr, tmp);
+                        sz = elf_dump_thread_status(signr, ets);
                        info->thread_status_size += sz;
                }
        }
@@ -2000,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
                        struct page *page;
-                        struct vm_area_struct *vma;
+                        struct vm_area_struct *tmp_vma;
                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-                                                &page, &vma) <= 0) {
+                                                &page, &tmp_vma) <= 0) {
                                DUMP_SEEK(PAGE_SIZE);
                        } else {
                                if (page == ZERO_PAGE(0)) {
@@ -2013,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
                                        }
                                } else {
                                        void *kaddr;
-                                        flush_cache_page(vma, addr,
+                                        flush_cache_page(tmp_vma, addr,
                                                         page_to_pfn(page));
                                        kaddr = kmap(page);
                                        if ((size += PAGE_SIZE) > limit ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32649f2a1654..ddd35d873391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
        retval = kernel_read(file, params->hdr.e_phoff,
                             (char *) params->phdrs, size);
-        if (retval < 0)
+        if (unlikely(retval != size))
-                return retval;
+                return retval < 0 ? retval : -ENOEXEC;
        /* determine stack size for this binary */
        phdr = params->phdrs;
@@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
                                             phdr->p_offset,
                                             interpreter_name,
                                             phdr->p_filesz);
-                        if (retval < 0)
+                        if (unlikely(retval != phdr->p_filesz)) {
+                                if (retval >= 0)
+                                        retval = -ENOEXEC;
                                goto error;
+                        }
                        retval = -ENOENT;
                        if (interpreter_name[phdr->p_filesz - 1] != '\0')
@@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
                        retval = kernel_read(interpreter, 0, bprm->buf,
                                             BINPRM_BUF_SIZE);
-                        if (retval < 0)
+                        if (unlikely(retval != BINPRM_BUF_SIZE)) {
+                                if (retval >= 0)
+                                        retval = -ENOEXEC;
                                goto error;
+                        }
                        interp_params.hdr = *((struct elfhdr *) bprm->buf);
                        break;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f95ae9789c91..f9c88d0c8ced 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
                        return -ENOEXEC;
        }
-        bprm->sh_bang++;        /* Well, the bang-shell is implicit... */
+        bprm->sh_bang = 1;      /* Well, the bang-shell is implicit... */
        allow_write_access(bprm->file);
        fput(bprm->file);
        bprm->file = NULL;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 0498b181dd52..3b40d45a3a16 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm,
                DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
                down_write(&current->mm->mmap_sem);
-                textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0);
+                textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
+                                  MAP_PRIVATE|MAP_EXECUTABLE, 0);
                up_write(&current->mm->mmap_sem);
                if (!textpos  || textpos >= (unsigned long) -4096) {
                        if (!textpos)
@@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void)
        return register_binfmt(&flat_format);
 }
-static void __exit exit_flat_binfmt(void)
-{
-        unregister_binfmt(&flat_format);
-}
 /****************************************************************************/
 core_initcall(init_flat_binfmt);
-module_exit(exit_flat_binfmt);
 /****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index dbf0ac0523de..7191306367c5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
        if (!enabled)
                goto _ret;
+        retval = -ENOEXEC;
+        if (bprm->misc_bang)
+                goto _ret;
+        bprm->misc_bang = 1;
        /* to keep locking time low, we copy the interpreter string */
        read_lock(&entries_lock);
        fmt = check_file(bprm);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index ab33939b12a7..9e3963f7ebf1 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
         * Sorta complicated, but hopefully it will work.  -TYT
         */
-        bprm->sh_bang++;
+        bprm->sh_bang = 1;
        allow_write_access(bprm->file);
        fput(bprm->file);
        bprm->file = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 6e0b6f66df03..78562574cb52 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -158,7 +158,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
                bio_init(bio);
                if (likely(nr_iovecs)) {
-                        unsigned long idx = 0; /* shut up gcc */
+                        unsigned long uninitialized_var(idx);
                        bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
                        if (unlikely(!bvl)) {
@@ -937,6 +937,96 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
        return ERR_PTR(-EINVAL);
 }
+static void bio_copy_kern_endio(struct bio *bio, int err)
+{
+        struct bio_vec *bvec;
+        const int read = bio_data_dir(bio) == READ;
+        char *p = bio->bi_private;
+        int i;
+        __bio_for_each_segment(bvec, bio, i, 0) {
+                char *addr = page_address(bvec->bv_page);
+                if (read && !err)
+                        memcpy(p, addr, bvec->bv_len);
+                __free_page(bvec->bv_page);
+                p += bvec->bv_len;
+        }
+        bio_put(bio);
+}
+/**
+ *      bio_copy_kern   -       copy kernel address into bio
+ *      @q: the struct request_queue for the bio
+ *      @data: pointer to buffer to copy
+ *      @len: length in bytes
+ *      @gfp_mask: allocation flags for bio and page allocation
+ *      @reading: data direction is READ
+ *
+ *      copy the kernel address into a bio suitable for io to a block
+ *      device. Returns an error pointer in case of error.
+ */
+struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
+                          gfp_t gfp_mask, int reading)
+{
+        unsigned long kaddr = (unsigned long)data;
+        unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        unsigned long start = kaddr >> PAGE_SHIFT;
+        const int nr_pages = end - start;
+        struct bio *bio;
+        struct bio_vec *bvec;
+        int i, ret;
+        bio = bio_alloc(gfp_mask, nr_pages);
+        if (!bio)
+                return ERR_PTR(-ENOMEM);
+        while (len) {
+                struct page *page;
+                unsigned int bytes = PAGE_SIZE;
+                if (bytes > len)
+                        bytes = len;
+                page = alloc_page(q->bounce_gfp | gfp_mask);
+                if (!page) {
+                        ret = -ENOMEM;
+                        goto cleanup;
+                }
+                if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
+                        ret = -EINVAL;
+                        goto cleanup;
+                }
+                len -= bytes;
+        }
+        if (!reading) {
+                void *p = data;
+                bio_for_each_segment(bvec, bio, i) {
+                        char *addr = page_address(bvec->bv_page);
+                        memcpy(addr, p, bvec->bv_len);
+                        p += bvec->bv_len;
+                }
+        }
+        bio->bi_private = data;
+        bio->bi_end_io = bio_copy_kern_endio;
+        return bio;
+cleanup:
+        bio_for_each_segment(bvec, bio, i)
+                __free_page(bvec->bv_page);
+        bio_put(bio);
+        return ERR_PTR(ret);
+}
 /*
 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
 * for performing direct-IO in BIOs.
@@ -1273,6 +1363,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
 EXPORT_SYMBOL(bio_map_user);
 EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_map_kern);
+EXPORT_SYMBOL(bio_copy_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
 EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 3db4a26adc44..a073f3f4f013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1101,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
                printk(KERN_ERR "%s: requested out-of-range block %llu for "
                        "device %s\n",
-                        __FUNCTION__, (unsigned long long)block,
+                        __func__, (unsigned long long)block,
                        bdevname(bdev, b));
                return -EIO;
        }
@@ -2211,8 +2211,8 @@ out:
        return err;
 }
-int cont_expand_zero(struct file *file, struct address_space *mapping,
+static int cont_expand_zero(struct file *file, struct address_space *mapping,
-                        loff_t pos, loff_t *bytes)
+                            loff_t pos, loff_t *bytes)
 {
        struct inode *inode = mapping->host;
        unsigned blocksize = 1 << inode->i_blkbits;
@@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
        return 0;
 }
-int generic_commit_write(struct file *file, struct page *page,
-                unsigned from, unsigned to)
-{
-        struct inode *inode = page->mapping->host;
-        loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-        __block_commit_write(inode,page,from,to);
-        /*
-         * No need to use i_size_read() here, the i_size
-         * cannot change under us because we hold i_mutex.
-         */
-        if (pos > inode->i_size) {
-                i_size_write(inode, pos);
-                mark_inode_dirty(inode);
-        }
-        return 0;
-}
 /*
 * block_page_mkwrite() is not allowed to change the file size as it gets
 * called from a page fault handler when a page is first dirtied. Hence we must
@@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync);
 EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(generic_block_bmap);
-EXPORT_SYMBOL(generic_commit_write);
 EXPORT_SYMBOL(generic_cont_expand_simple);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 038674aa88a7..68e510b88457 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,6 @@ static struct char_device_struct {
        unsigned int baseminor;
        int minorct;
        char name[64];
-        struct file_operations *fops;
        struct cdev *cdev;              /* will die */
 } *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 05c9da6181c3..8355e918fddf 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,6 @@
+Version 1.53
+------------
 Version 1.52
 ------------
 Fix oops on second mount to server when null auth is used.
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index bcda2c6b6a04..cb52cbbe45ff 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -460,8 +460,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
        unsigned char *sequence_end;
        unsigned long *oid = NULL;
        unsigned int cls, con, tag, oidlen, rc;
-        int use_ntlmssp = FALSE;
+        bool use_ntlmssp = false;
-        int use_kerberos = FALSE;
+        bool use_kerberos = false;
        *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
@@ -561,15 +561,15 @@ decode_negTokenInit(unsigned char *security_blob, int length,
                                        if (compare_oid(oid, oidlen,
                                                        MSKRB5_OID,
                                                        MSKRB5_OID_LEN))
-                                                use_kerberos = TRUE;
+                                                use_kerberos = true;
                                        else if (compare_oid(oid, oidlen,
                                                             KRB5_OID,
                                                             KRB5_OID_LEN))
-                                                use_kerberos = TRUE;
+                                                use_kerberos = true;
                                        else if (compare_oid(oid, oidlen,
                                                             NTLMSSP_OID,
                                                             NTLMSSP_OID_LEN))
-                                                use_ntlmssp = TRUE;
+                                                use_ntlmssp = true;
                                        kfree(oid);
                                }
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0228ed06069e..cc950f69e51e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -468,7 +468,7 @@ cifs_proc_init(void)
 {
        struct proc_dir_entry *pde;
-        proc_fs_cifs = proc_mkdir("cifs", proc_root_fs);
+        proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
        if (proc_fs_cifs == NULL)
                return;
@@ -559,7 +559,7 @@ cifs_proc_clean(void)
        remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
        remove_proc_entry("Experimental", proc_fs_cifs);
        remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
-        remove_proc_entry("cifs", proc_root_fs);
+        remove_proc_entry("fs/cifs", NULL);
 }
 static int
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 95024c066d89..f6fdecf6598c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -93,15 +93,11 @@ static char *cifs_get_share_name(const char *node_name)
        /* find sharename end */
        pSep++;
        pSep = memchr(UNC+(pSep-UNC), '\\', len-(pSep-UNC));
-        if (!pSep) {
+        if (pSep) {
-                cERROR(1, ("%s:2 cant find share name in node name: %s",
+                /* trim path up to sharename end
-                        __func__, node_name));
+                 * now we have share name in UNC */
-                kfree(UNC);
+                *pSep = 0;
-                return NULL;
        }
-        /* trim path up to sharename end
-         *          * now we have share name in UNC */
-        *pSep = 0;
        return UNC;
 }
@@ -188,7 +184,7 @@ static char *compose_mount_options(const char *sb_mountdata,
                tkn_e = strchr(tkn_e+1, '\\');
                if (tkn_e) {
                        strcat(mountdata, ",prefixpath=");
-                        strcat(mountdata, tkn_e);
+                        strcat(mountdata, tkn_e+1);
                }
        }
@@ -244,7 +240,8 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry)
                return NULL;
        if (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS) {
-                /* we should use full path name to correct working with DFS */
+                int i;
+                /* we should use full path name for correct working with DFS */
                l_max_len = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE+1) +
                                        strnlen(search_path, MAX_PATHCONF) + 1;
                tmp_path = kmalloc(l_max_len, GFP_KERNEL);
@@ -253,8 +250,14 @@ static char *build_full_dfs_path_from_dentry(struct dentry *dentry)
                        return NULL;
                }
                strncpy(tmp_path, cifs_sb->tcon->treeName, l_max_len);
-                strcat(tmp_path, search_path);
                tmp_path[l_max_len-1] = 0;
+                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
+                        for (i = 0; i < l_max_len; i++) {
+                                if (tmp_path[i] == '\\')
+                                        tmp_path[i] = '/';
+                        }
+                strncat(tmp_path, search_path, l_max_len - strlen(tmp_path));
                full_path = tmp_path;
                kfree(search_path);
        } else {
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index e99d4faf5f02..34902cff5400 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -559,7 +559,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
                                       const char *path, const __u16 *pfid)
 {
        struct cifsFileInfo *open_file = NULL;
-        int unlock_file = FALSE;
+        bool unlock_file = false;
        int xid;
        int rc = -EIO;
        __u16 fid;
@@ -586,10 +586,10 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
        cifs_sb = CIFS_SB(sb);
        if (open_file) {
-                unlock_file = TRUE;
+                unlock_file = true;
                fid = open_file->netfid;
        } else if (pfid == NULL) {
-                int oplock = FALSE;
+                int oplock = 0;
                /* open file */
                rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
                                READ_CONTROL, 0, &fid, &oplock, NULL,
@@ -604,7 +604,7 @@ static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode,
        rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
        cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen));
-        if (unlock_file == TRUE) /* find_readable_file increments ref count */
+        if (unlock_file == true) /* find_readable_file increments ref count */
                atomic_dec(&open_file->wrtPending);
        else if (pfid == NULL) /* if opened above we have to close the handle */
                CIFSSMBClose(xid, cifs_sb->tcon, fid);
@@ -619,7 +619,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
                                struct inode *inode, const char *path)
 {
        struct cifsFileInfo *open_file;
-        int unlock_file = FALSE;
+        bool unlock_file = false;
        int xid;
        int rc = -EIO;
        __u16 fid;
@@ -640,10 +640,10 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
        open_file = find_readable_file(CIFS_I(inode));
        if (open_file) {
-                unlock_file = TRUE;
+                unlock_file = true;
                fid = open_file->netfid;
        } else {
-                int oplock = FALSE;
+                int oplock = 0;
                /* open file */
                rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN,
                                WRITE_DAC, 0, &fid, &oplock, NULL,
@@ -658,7 +658,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
        rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
        cFYI(DBG2, ("SetCIFSACL rc = %d", rc));
-        if (unlock_file == TRUE)
+        if (unlock_file)
                atomic_dec(&open_file->wrtPending);
        else
                CIFSSMBClose(xid, cifs_sb->tcon, fid);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 39c2cbdface7..427a7c695896 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -222,50 +222,50 @@ static int
 cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
-        int xid;
+        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+        struct cifsTconInfo *tcon = cifs_sb->tcon;
        int rc = -EOPNOTSUPP;
-        struct cifs_sb_info *cifs_sb;
+        int xid;
-        struct cifsTconInfo *pTcon;
        xid = GetXid();
-        cifs_sb = CIFS_SB(sb);
-        pTcon = cifs_sb->tcon;
        buf->f_type = CIFS_MAGIC_NUMBER;
-        /* instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO */
+        /*
-        buf->f_namelen = PATH_MAX; /* PATH_MAX may be too long - it would
+         * PATH_MAX may be too long - it would presumably be total path,
-                                      presumably be total path, but note
+         * but note that some servers (includinng Samba 3) have a shorter
-                                      that some servers (includinng Samba 3)
+         * maximum path.
-                                      have a shorter maximum path */
+         *
+         * Instead could get the real value via SMB_QUERY_FS_ATTRIBUTE_INFO.
+         */
+        buf->f_namelen = PATH_MAX;
        buf->f_files = 0;       /* undefined */
        buf->f_ffree = 0;       /* unlimited */
-/* BB we could add a second check for a QFS Unix capability bit */
+        /*
-/* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
+         * We could add a second check for a QFS Unix capability bit
-    if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
+         */
-                        le64_to_cpu(pTcon->fsUnixInfo.Capability)))
+        if ((tcon->ses->capabilities & CAP_UNIX) &&
-            rc = CIFSSMBQFSPosixInfo(xid, pTcon, buf);
+            (CIFS_POSIX_EXTENSIONS & le64_to_cpu(tcon->fsUnixInfo.Capability)))
+                rc = CIFSSMBQFSPosixInfo(xid, tcon, buf);
-    /* Only need to call the old QFSInfo if failed
-    on newer one */
+        /*
-    if (rc)
+         * Only need to call the old QFSInfo if failed on newer one,
-        if (pTcon->ses->capabilities & CAP_NT_SMBS)
+         * e.g. by OS/2.
-                rc = CIFSSMBQFSInfo(xid, pTcon, buf); /* not supported by OS2 */
+         **/
+        if (rc && (tcon->ses->capabilities & CAP_NT_SMBS))
-        /* Some old Windows servers also do not support level 103, retry with
+                rc = CIFSSMBQFSInfo(xid, tcon, buf);
-           older level one if old server failed the previous call or we
-           bypassed it because we detected that this was an older LANMAN sess */
+        /*
+         * Some old Windows servers also do not support level 103, retry with
+         * older level one if old server failed the previous call or we
+         * bypassed it because we detected that this was an older LANMAN sess
+         */
        if (rc)
-                rc = SMBOldQFSInfo(xid, pTcon, buf);
+                rc = SMBOldQFSInfo(xid, tcon, buf);
-        /* int f_type;
-           __fsid_t f_fsid;
-           int f_namelen;  */
-        /* BB get from info in tcon struct at mount time call to QFSAttrInfo */
        FreeXid(xid);
-        return 0;               /* always return success? what if volume is no
+        return 0;
-                                   longer available? */
 }
 static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
@@ -306,8 +306,8 @@ cifs_alloc_inode(struct super_block *sb)
        /* Until the file is open and we have gotten oplock
        info back from the server, can not assume caching of
        file data or metadata */
-        cifs_inode->clientCanCacheRead = FALSE;
+        cifs_inode->clientCanCacheRead = false;
-        cifs_inode->clientCanCacheAll = FALSE;
+        cifs_inode->clientCanCacheAll = false;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
        /* Can not set i_flags here - they get immediately overwritten
@@ -940,7 +940,7 @@ static int cifs_oplock_thread(void *dummyarg)
                                    rc = CIFSSMBLock(0, pTcon, netfid,
                                            0 /* len */ , 0 /* offset */, 0,
                                            0, LOCKING_ANDX_OPLOCK_RELEASE,
-                                            0 /* wait flag */);
+                                            false /* wait flag */);
                                        cFYI(1, ("Oplock release rc = %d", rc));
                                }
                        } else
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index e1dd9f32e1d7..cd1301a09b3b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -24,14 +24,6 @@
 #define ROOT_I 2
-#ifndef FALSE
-#define FALSE 0
-#endif
-#ifndef TRUE
-#define TRUE 1
-#endif
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
 extern const struct address_space_operations cifs_addr_ops_smallbuf;
@@ -110,5 +102,5 @@ extern int cifs_ioctl(struct inode *inode, struct file *filep,
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
-#define CIFS_VERSION   "1.52"
+#define CIFS_VERSION   "1.53"
 #endif                          /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 69a2e1942542..b7d9f698e63e 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -57,14 +57,6 @@
 #include "cifspdu.h"
-#ifndef FALSE
-#define FALSE 0
-#endif
-#ifndef TRUE
-#define TRUE 1
-#endif
 #ifndef XATTR_DOS_ATTRIB
 #define XATTR_DOS_ATTRIB "user.DOSATTRIB"
 #endif
@@ -147,7 +139,7 @@ struct TCP_Server_Info {
        enum protocolEnum protocolType;
        char versionMajor;
        char versionMinor;
-        unsigned svlocal:1;     /* local server or remote */
+        bool svlocal:1;                 /* local server or remote */
        atomic_t socketUseCount; /* number of open cifs sessions on socket */
        atomic_t inFlight;  /* number of requests on the wire to server */
 #ifdef CONFIG_CIFS_STATS2
@@ -286,10 +278,10 @@ struct cifsTconInfo {
        FILE_SYSTEM_DEVICE_INFO fsDevInfo;
        FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
        FILE_SYSTEM_UNIX_INFO fsUnixInfo;
-        unsigned ipc:1;         /* set if connection to IPC$ eg for RPC/PIPES */
+        bool ipc:1;             /* set if connection to IPC$ eg for RPC/PIPES */
-        unsigned retry:1;
+        bool retry:1;
-        unsigned nocase:1;
+        bool nocase:1;
-        unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol
+        bool unix_ext:1;  /* if false disable Linux extensions to CIFS protocol
                                for this mount even if server would support */
        /* BB add field for back pointer to sb struct(s)? */
 };
@@ -317,10 +309,10 @@ struct cifs_search_info {
        char *srch_entries_start;
        char *presume_name;
        unsigned int resume_name_len;
-        unsigned endOfSearch:1;
+        bool endOfSearch:1;
-        unsigned emptyDir:1;
+        bool emptyDir:1;
-        unsigned unicode:1;
+        bool unicode:1;
-        unsigned smallBuf:1; /* so we know which buf_release function to call */
+        bool smallBuf:1; /* so we know which buf_release function to call */
 };
 struct cifsFileInfo {
@@ -335,9 +327,9 @@ struct cifsFileInfo {
        struct inode *pInode; /* needed for oplock break */
        struct mutex lock_mutex;
        struct list_head llist; /* list of byte range locks we have. */
-        unsigned closePend:1;   /* file is marked to close */
+        bool closePend:1;       /* file is marked to close */
-        unsigned invalidHandle:1;  /* file closed via session abend */
+        bool invalidHandle:1;   /* file closed via session abend */
-        unsigned messageMode:1;    /* for pipes: message vs byte mode */
+        bool messageMode:1;     /* for pipes: message vs byte mode */
        atomic_t wrtPending;   /* handle in use - defer close */
        struct semaphore fh_sem; /* prevents reopen race after dead ses*/
        char *search_resume_name; /* BB removeme BB */
@@ -356,9 +348,9 @@ struct cifsInodeInfo {
        __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
        atomic_t inUse;  /* num concurrent users (local openers cifs) of file*/
        unsigned long time;     /* jiffies of last update/check of inode */
-        unsigned clientCanCacheRead:1; /* read oplock */
+        bool clientCanCacheRead:1;      /* read oplock */
-        unsigned clientCanCacheAll:1;  /* read and writebehind oplock */
+        bool clientCanCacheAll:1;       /* read and writebehind oplock */
-        unsigned oplockPending:1;
+        bool oplockPending:1;
        struct inode vfs_inode;
 };
@@ -426,9 +418,9 @@ struct mid_q_entry {
        struct smb_hdr *resp_buf;       /* response buffer */
        int midState;   /* wish this were enum but can not pass to wait_event */
        __u8 command;   /* smb command code */
-        unsigned largeBuf:1;    /* if valid response, is pointer to large buf */
+        bool largeBuf:1;        /* if valid response, is pointer to large buf */
-        unsigned multiRsp:1;   /* multiple trans2 responses for one request  */
+        bool multiRsp:1;        /* multiple trans2 responses for one request  */
-        unsigned multiEnd:1; /* both received */
+        bool multiEnd:1;        /* both received */
 };
 struct oplock_q_entry {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 9f49c2f3582c..c43bf4b7a556 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -340,6 +340,7 @@
 #define OPEN_NO_RECALL          0x00400000
 #define OPEN_FREE_SPACE_QUERY   0x00800000      /* should be zero */
 #define CREATE_OPTIONS_MASK     0x007FFFFF
+#define CREATE_OPTION_READONLY  0x10000000
 #define CREATE_OPTION_SPECIAL   0x20000000   /* system. NB not sent over wire */
 /* ImpersonationLevel flags */
@@ -2050,7 +2051,7 @@ typedef struct {
                                                      to 0xFFFF00 */
 #define CIFS_UNIX_LARGE_WRITE_CAP       0x00000080
 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */
-#define CIFS_UNIX_TRANPSORT_ENCRYPTION_MANDATORY_CAP  0x00000200 /* must do  */
+#define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP  0x00000200 /* must do  */
 #define CIFS_UNIX_PROXY_CAP             0x00000400 /* Proxy cap: 0xACE ioctl and
                                                      QFS PROXY call */
 #ifdef CONFIG_CIFS_POSIX
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 50f9fdae19b3..d481f6c5a2be 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -59,8 +59,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
                        struct smb_hdr *out_buf,
                        int *bytes_returned);
 extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
-extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
+extern bool is_valid_oplock_break(struct smb_hdr *smb,
-extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
+                                  struct TCP_Server_Info *);
+extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *);
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *);
@@ -69,7 +70,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
 extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
                        enum securityEnum *secType);
-extern int cifs_inet_pton(int, char *source, void *dst);
+extern int cifs_inet_pton(const int, const char *source, void *dst);
 extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
                            const struct cifsTconInfo *, int /* length of
@@ -187,12 +188,12 @@ extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
 #endif /* possibly unneeded function */
 extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
                        const char *fileName, __u64 size,
-                        int setAllocationSizeFlag,
+                        bool setAllocationSizeFlag,
                        const struct nls_table *nls_codepage,
                        int remap_special_chars);
 extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
                         __u64 size, __u16 fileHandle, __u32 opener_pid,
-                        int AllocSizeFlag);
+                        bool AllocSizeFlag);
 extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon,
                        char *full_path, __u64 mode, __u64 uid,
                        __u64 gid, dev_t dev,
@@ -291,11 +292,11 @@ extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
                        const __u16 netfid, const __u64 len,
                        const __u64 offset, const __u32 numUnlock,
                        const __u32 numLock, const __u8 lockType,
-                        const int waitFlag);
+                        const bool waitFlag);
 extern int CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
                        const __u16 smb_file_id, const int get_flag,
                        const __u64 len, struct file_lock *,
-                        const __u16 lock_type, const int waitFlag);
+                        const __u16 lock_type, const bool waitFlag);
 extern int CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon);
 extern int CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4728fa982a4e..95fbba4ea7d4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -95,7 +95,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
        list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
                if (open_file)
-                        open_file->invalidHandle = TRUE;
+                        open_file->invalidHandle = true;
        }
        write_unlock(&GlobalSMBSeslock);
        /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -141,7 +141,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
                                if (tcon->ses->server->tcpStatus ==
                                                        CifsNeedReconnect) {
                                        /* on "soft" mounts we wait once */
-                                        if ((tcon->retry == FALSE) ||
+                                        if (!tcon->retry ||
                                           (tcon->ses->status == CifsExiting)) {
                                                cFYI(1, ("gave up waiting on "
                                                      "reconnect in smb_init"));
@@ -289,7 +289,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
                                if (tcon->ses->server->tcpStatus ==
                                                CifsNeedReconnect) {
                                        /* on "soft" mounts we wait once */
-                                        if ((tcon->retry == FALSE) ||
+                                        if (!tcon->retry ||
                                           (tcon->ses->status == CifsExiting)) {
                                                cFYI(1, ("gave up waiting on "
                                                      "reconnect in smb_init"));
@@ -1224,11 +1224,8 @@ OldOpenRetry:
        else /* BB FIXME BB */
                pSMB->FileAttributes = cpu_to_le16(0/*ATTR_NORMAL*/);
-        /* if ((omode & S_IWUGO) == 0)
+        if (create_options & CREATE_OPTION_READONLY)
-                pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/
+                pSMB->FileAttributes |= cpu_to_le16(ATTR_READONLY);
-        /*  Above line causes problems due to vfs splitting create into two
-            pieces - need to set mode after file created not while it is
-            being created */
        /* BB FIXME BB */
 /*      pSMB->CreateOptions = cpu_to_le32(create_options &
@@ -1331,17 +1328,16 @@ openRetry:
                pSMB->FileAttributes = cpu_to_le32(ATTR_SYSTEM);
        else
                pSMB->FileAttributes = cpu_to_le32(ATTR_NORMAL);
        /* XP does not handle ATTR_POSIX_SEMANTICS */
        /* but it helps speed up case sensitive checks for other
        servers such as Samba */
        if (tcon->ses->capabilities & CAP_UNIX)
                pSMB->FileAttributes |= cpu_to_le32(ATTR_POSIX_SEMANTICS);
-        /* if ((omode & S_IWUGO) == 0)
+        if (create_options & CREATE_OPTION_READONLY)
-                pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);*/
+                pSMB->FileAttributes |= cpu_to_le32(ATTR_READONLY);
-        /*  Above line causes problems due to vfs splitting create into two
-                pieces - need to set mode after file created not while it is
-                being created */
        pSMB->ShareAccess = cpu_to_le32(FILE_SHARE_ALL);
        pSMB->CreateDisposition = cpu_to_le32(openDisposition);
        pSMB->CreateOptions = cpu_to_le32(create_options & CREATE_OPTIONS_MASK);
@@ -1686,7 +1682,7 @@ int
 CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
            const __u16 smb_file_id, const __u64 len,
            const __u64 offset, const __u32 numUnlock,
-            const __u32 numLock, const __u8 lockType, const int waitFlag)
+            const __u32 numLock, const __u8 lockType, const bool waitFlag)
 {
        int rc = 0;
        LOCK_REQ *pSMB = NULL;
@@ -1695,7 +1691,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
        int timeout = 0;
        __u16 count;
-        cFYI(1, ("CIFSSMBLock timeout %d numLock %d", waitFlag, numLock));
+        cFYI(1, ("CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock));
        rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
        if (rc)
@@ -1706,7 +1702,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
        if (lockType == LOCKING_ANDX_OPLOCK_RELEASE) {
                timeout = CIFS_ASYNC_OP; /* no response expected */
                pSMB->Timeout = 0;
-        } else if (waitFlag == TRUE) {
+        } else if (waitFlag) {
                timeout = CIFS_BLOCKING_OP; /* blocking operation, no timeout */
                pSMB->Timeout = cpu_to_le32(-1);/* blocking - do not time out */
        } else {
@@ -1756,7 +1752,7 @@ int
 CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
                const __u16 smb_file_id, const int get_flag, const __u64 len,
                struct file_lock *pLockData, const __u16 lock_type,
-                const int waitFlag)
+                const bool waitFlag)
 {
        struct smb_com_transaction2_sfi_req *pSMB  = NULL;
        struct smb_com_transaction2_sfi_rsp *pSMBr = NULL;
@@ -3581,9 +3577,9 @@ findFirstRetry:
                rc = validate_t2((struct smb_t2_rsp *)pSMBr);
                if (rc == 0) {
                        if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
-                                psrch_inf->unicode = TRUE;
+                                psrch_inf->unicode = true;
                        else
-                                psrch_inf->unicode = FALSE;
+                                psrch_inf->unicode = false;
                        psrch_inf->ntwrk_buf_start = (char *)pSMBr;
                        psrch_inf->smallBuf = 0;
@@ -3594,9 +3590,9 @@ findFirstRetry:
                               le16_to_cpu(pSMBr->t2.ParameterOffset));
                        if (parms->EndofSearch)
-                                psrch_inf->endOfSearch = TRUE;
+                                psrch_inf->endOfSearch = true;
                        else
-                                psrch_inf->endOfSearch = FALSE;
+                                psrch_inf->endOfSearch = false;
                        psrch_inf->entries_in_buffer =
                                        le16_to_cpu(parms->SearchCount);
@@ -3624,7 +3620,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
        cFYI(1, ("In FindNext"));
-        if (psrch_inf->endOfSearch == TRUE)
+        if (psrch_inf->endOfSearch)
                return -ENOENT;
        rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -3682,7 +3678,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
        cifs_stats_inc(&tcon->num_fnext);
        if (rc) {
                if (rc == -EBADF) {
-                        psrch_inf->endOfSearch = TRUE;
+                        psrch_inf->endOfSearch = true;
                        rc = 0; /* search probably was closed at end of search*/
                } else
                        cFYI(1, ("FindNext returned = %d", rc));
@@ -3692,9 +3688,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
                if (rc == 0) {
                        /* BB fixme add lock for file (srch_info) struct here */
                        if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
-                                psrch_inf->unicode = TRUE;
+                                psrch_inf->unicode = true;
                        else
-                                psrch_inf->unicode = FALSE;
+                                psrch_inf->unicode = false;
                        response_data = (char *) &pSMBr->hdr.Protocol +
                               le16_to_cpu(pSMBr->t2.ParameterOffset);
                        parms = (T2_FNEXT_RSP_PARMS *)response_data;
@@ -3709,9 +3705,9 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
                        psrch_inf->ntwrk_buf_start = (char *)pSMB;
                        psrch_inf->smallBuf = 0;
                        if (parms->EndofSearch)
-                                psrch_inf->endOfSearch = TRUE;
+                                psrch_inf->endOfSearch = true;
                        else
-                                psrch_inf->endOfSearch = FALSE;
+                                psrch_inf->endOfSearch = false;
                        psrch_inf->entries_in_buffer =
                                                le16_to_cpu(parms->SearchCount);
                        psrch_inf->index_of_last_entry +=
@@ -4586,7 +4582,7 @@ QFSPosixRetry:
 int
 CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName,
-              __u64 size, int SetAllocation,
+              __u64 size, bool SetAllocation,
              const struct nls_table *nls_codepage, int remap)
 {
        struct smb_com_transaction2_spi_req *pSMB = NULL;
@@ -4675,7 +4671,7 @@ SetEOFRetry:
 int
 CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
-                   __u16 fid, __u32 pid_of_opener, int SetAllocation)
+                   __u16 fid, __u32 pid_of_opener, bool SetAllocation)
 {
        struct smb_com_transaction2_sfi_req *pSMB  = NULL;
        char *data_offset;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e17106730168..f428bf3bf1a9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -49,8 +49,6 @@
 #define CIFS_PORT 445
 #define RFC1001_PORT 139
-static DECLARE_COMPLETION(cifsd_complete);
 extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
                         unsigned char *p24);
@@ -71,23 +69,23 @@ struct smb_vol {
        mode_t file_mode;
        mode_t dir_mode;
        unsigned secFlg;
-        unsigned rw:1;
+        bool rw:1;
-        unsigned retry:1;
+        bool retry:1;
-        unsigned intr:1;
+        bool intr:1;
-        unsigned setuids:1;
+        bool setuids:1;
-        unsigned override_uid:1;
+        bool override_uid:1;
-        unsigned override_gid:1;
+        bool override_gid:1;
-        unsigned noperm:1;
+        bool noperm:1;
-        unsigned no_psx_acl:1; /* set if posix acl support should be disabled */
+        bool no_psx_acl:1; /* set if posix acl support should be disabled */
-        unsigned cifs_acl:1;
+        bool cifs_acl:1;
-        unsigned no_xattr:1;   /* set if xattr (EA) support should be disabled*/
+        bool no_xattr:1;   /* set if xattr (EA) support should be disabled*/
-        unsigned server_ino:1; /* use inode numbers from server ie UniqueId */
+        bool server_ino:1; /* use inode numbers from server ie UniqueId */
-        unsigned direct_io:1;
+        bool direct_io:1;
-        unsigned remap:1;   /* set to remap seven reserved chars in filenames */
+        bool remap:1;     /* set to remap seven reserved chars in filenames */
-        unsigned posix_paths:1;   /* unset to not ask for posix pathnames. */
+        bool posix_paths:1;   /* unset to not ask for posix pathnames. */
-        unsigned no_linux_ext:1;
+        bool no_linux_ext:1;
-        unsigned sfu_emul:1;
+        bool sfu_emul:1;
-        unsigned nullauth:1; /* attempt to authenticate with null user */
+        bool nullauth:1; /* attempt to authenticate with null user */
        unsigned nocase;     /* request case insensitive filenames */
        unsigned nobrl;      /* disable sending byte range locks to srv */
        unsigned int rsize;
@@ -345,18 +343,16 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
        struct task_struct *task_to_wake = NULL;
        struct mid_q_entry *mid_entry;
        char temp;
-        int isLargeBuf = FALSE;
+        bool isLargeBuf = false;
-        int isMultiRsp;
+        bool isMultiRsp;
        int reconnect;
        current->flags |= PF_MEMALLOC;
-        server->tsk = current;  /* save process info to wake at shutdown */
        cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
        write_lock(&GlobalSMBSeslock);
        atomic_inc(&tcpSesAllocCount);
        length = tcpSesAllocCount.counter;
        write_unlock(&GlobalSMBSeslock);
-        complete(&cifsd_complete);
        if (length  > 1)
                mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
                                GFP_KERNEL);
@@ -390,8 +386,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
                } else /* if existing small buf clear beginning */
                        memset(smallbuf, 0, sizeof(struct smb_hdr));
-                isLargeBuf = FALSE;
+                isLargeBuf = false;
-                isMultiRsp = FALSE;
+                isMultiRsp = false;
                smb_buffer = smallbuf;
                iov.iov_base = smb_buffer;
                iov.iov_len = 4;
@@ -517,7 +513,7 @@ incomplete_rcv:
                reconnect = 0;
                if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE - 4) {
-                        isLargeBuf = TRUE;
+                        isLargeBuf = true;
                        memcpy(bigbuf, smallbuf, 4);
                        smb_buffer = bigbuf;
                }
@@ -582,16 +578,18 @@ incomplete_rcv:
                            (mid_entry->command == smb_buffer->Command)) {
                                if (check2ndT2(smb_buffer,server->maxBuf) > 0) {
                                        /* We have a multipart transact2 resp */
-                                        isMultiRsp = TRUE;
+                                        isMultiRsp = true;
                                        if (mid_entry->resp_buf) {
                                                /* merge response - fix up 1st*/
                                                if (coalesce_t2(smb_buffer,
                                                        mid_entry->resp_buf)) {
-                                                        mid_entry->multiRsp = 1;
+                                                        mid_entry->multiRsp =
+                                                                 true;
                                                        break;
                                                } else {
                                                        /* all parts received */
-                                                        mid_entry->multiEnd = 1;
+                                                        mid_entry->multiEnd =
+                                                                 true;
                                                        goto multi_t2_fnd;
                                                }
                                        } else {
@@ -603,17 +601,15 @@ incomplete_rcv:
                                                        /* Have first buffer */
                                                        mid_entry->resp_buf =
                                                                 smb_buffer;
-                                                        mid_entry->largeBuf = 1;
+                                                        mid_entry->largeBuf =
+                                                                 true;
                                                        bigbuf = NULL;
                                                }
                                        }
                                        break;
                                }
                                mid_entry->resp_buf = smb_buffer;
-                                if (isLargeBuf)
+                                mid_entry->largeBuf = isLargeBuf;
-                                        mid_entry->largeBuf = 1;
-                                else
-                                        mid_entry->largeBuf = 0;
 multi_t2_fnd:
                                task_to_wake = mid_entry->tsk;
                                mid_entry->midState = MID_RESPONSE_RECEIVED;
@@ -638,8 +634,8 @@ multi_t2_fnd:
                                        smallbuf = NULL;
                        }
                        wake_up_process(task_to_wake);
-                } else if ((is_valid_oplock_break(smb_buffer, server) == FALSE)
+                } else if (!is_valid_oplock_break(smb_buffer, server) &&
-                    && (isMultiRsp == FALSE)) {
+                           !isMultiRsp) {
                        cERROR(1, ("No task to wake, unknown frame received! "
                                   "NumMids %d", midCount.counter));
                        cifs_dump_mem("Received Data is: ", (char *)smb_buffer,
@@ -654,10 +650,20 @@ multi_t2_fnd:
        spin_lock(&GlobalMid_Lock);
        server->tcpStatus = CifsExiting;
-        server->tsk = NULL;
+        spin_unlock(&GlobalMid_Lock);
+        /* don't exit until kthread_stop is called */
+        set_current_state(TASK_UNINTERRUPTIBLE);
+        while (!kthread_should_stop()) {
+                schedule();
+                set_current_state(TASK_UNINTERRUPTIBLE);
+        }
+        set_current_state(TASK_RUNNING);
        /* check if we have blocked requests that need to free */
        /* Note that cifs_max_pending is normally 50, but
        can be set at module install time to as little as two */
+        spin_lock(&GlobalMid_Lock);
        if (atomic_read(&server->inFlight) >= cifs_max_pending)
                atomic_set(&server->inFlight, cifs_max_pending - 1);
        /* We do not want to set the max_pending too low or we
@@ -825,7 +831,7 @@ cifs_parse_mount_options(char *options, const char *devname,
        vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP);
        /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
-        vol->rw = TRUE;
+        vol->rw = true;
        /* default is always to request posix paths. */
        vol->posix_paths = 1;
@@ -1181,7 +1187,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                } else if (strnicmp(data, "guest", 5) == 0) {
                        /* ignore */
                } else if (strnicmp(data, "rw", 2) == 0) {
-                        vol->rw = TRUE;
+                        vol->rw = true;
                } else if ((strnicmp(data, "suid", 4) == 0) ||
                                   (strnicmp(data, "nosuid", 6) == 0) ||
                                   (strnicmp(data, "exec", 4) == 0) ||
@@ -1197,7 +1203,7 @@ cifs_parse_mount_options(char *options, const char *devname,
                            is ok to just ignore them */
                        continue;
                } else if (strnicmp(data, "ro", 2) == 0) {
-                        vol->rw = FALSE;
+                        vol->rw = false;
                } else if (strnicmp(data, "hard", 4) == 0) {
                        vol->retry = 1;
                } else if (strnicmp(data, "soft", 4) == 0) {
@@ -1305,6 +1311,9 @@ cifs_parse_mount_options(char *options, const char *devname,
                                                    "begin with // or \\\\ \n");
                                return 1;
                        }
+                        value = strpbrk(vol->UNC+2, "/\\");
+                        if (value)
+                                *value = '\\';
                } else {
                        printk(KERN_WARNING "CIFS: UNC name too long\n");
                        return 1;
@@ -1318,42 +1327,43 @@ cifs_parse_mount_options(char *options, const char *devname,
 static struct cifsSesInfo *
 cifs_find_tcp_session(struct in_addr *target_ip_addr,
-                struct in6_addr *target_ip6_addr,
+                      struct in6_addr *target_ip6_addr,
-                 char *userName, struct TCP_Server_Info **psrvTcp)
+                      char *userName, struct TCP_Server_Info **psrvTcp)
 {
        struct list_head *tmp;
        struct cifsSesInfo *ses;
        *psrvTcp = NULL;
-        read_lock(&GlobalSMBSeslock);
+        read_lock(&GlobalSMBSeslock);
        list_for_each(tmp, &GlobalSMBSessionList) {
                ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
-                if (ses->server) {
+                if (!ses->server)
-                        if ((target_ip_addr &&
+                        continue;
-                                (ses->server->addr.sockAddr.sin_addr.s_addr
-                                  == target_ip_addr->s_addr)) || (target_ip6_addr
+                if (target_ip_addr &&
-                                && memcmp(&ses->server->addr.sockAddr6.sin6_addr,
+                    ses->server->addr.sockAddr.sin_addr.s_addr != target_ip_addr->s_addr)
-                                        target_ip6_addr, sizeof(*target_ip6_addr)))) {
+                                continue;
-                                /* BB lock server and tcp session and increment
+                else if (target_ip6_addr &&
-                                      use count here?? */
+                         memcmp(&ses->server->addr.sockAddr6.sin6_addr,
+                                target_ip6_addr, sizeof(*target_ip6_addr)))
-                                /* found a match on the TCP session */
+                                continue;
-                                *psrvTcp = ses->server;
+                /* BB lock server and tcp session; increment use count here?? */
-                                /* BB check if reconnection needed */
+                /* found a match on the TCP session */
-                                if (strncmp
+                *psrvTcp = ses->server;
-                                    (ses->userName, userName,
-                                     MAX_USERNAME_SIZE) == 0){
+                /* BB check if reconnection needed */
-                                        read_unlock(&GlobalSMBSeslock);
+                if (strncmp(ses->userName, userName, MAX_USERNAME_SIZE) == 0) {
-                                        /* Found exact match on both TCP and
+                        read_unlock(&GlobalSMBSeslock);
-                                           SMB sessions */
+                        /* Found exact match on both TCP and
-                                        return ses;
+                           SMB sessions */
-                                }
+                        return ses;
-                        }
                }
                /* else tcp and smb sessions need reconnection */
        }
        read_unlock(&GlobalSMBSeslock);
        return NULL;
 }
@@ -1362,45 +1372,43 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName)
 {
        struct list_head *tmp;
        struct cifsTconInfo *tcon;
+        __be32 old_ip;
        read_lock(&GlobalSMBSeslock);
        list_for_each(tmp, &GlobalTreeConnectionList) {
                cFYI(1, ("Next tcon"));
                tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-                if (tcon->ses) {
+                if (!tcon->ses || !tcon->ses->server)
-                        if (tcon->ses->server) {
+                        continue;
-                                cFYI(1,
-                                     ("old ip addr: %x == new ip %x ?",
+                old_ip = tcon->ses->server->addr.sockAddr.sin_addr.s_addr;
-                                      tcon->ses->server->addr.sockAddr.sin_addr.
+                cFYI(1, ("old ip addr: %x == new ip %x ?",
-                                      s_addr, new_target_ip_addr));
+                        old_ip, new_target_ip_addr));
-                                if (tcon->ses->server->addr.sockAddr.sin_addr.
-                                    s_addr == new_target_ip_addr) {
+                if (old_ip != new_target_ip_addr)
-        /* BB lock tcon, server and tcp session and increment use count here? */
+                        continue;
-                                        /* found a match on the TCP session */
-                                        /* BB check if reconnection needed */
+                /* BB lock tcon, server, tcp session and increment use count? */
-                                        cFYI(1,
+                /* found a match on the TCP session */
-                                              ("IP match, old UNC: %s new: %s",
+                /* BB check if reconnection needed */
-                                              tcon->treeName, uncName));
+                cFYI(1, ("IP match, old UNC: %s new: %s",
-                                        if (strncmp
+                        tcon->treeName, uncName));
-                                            (tcon->treeName, uncName,
-                                             MAX_TREE_SIZE) == 0) {
+                if (strncmp(tcon->treeName, uncName, MAX_TREE_SIZE))
-                                                cFYI(1,
+                        continue;
-                                                     ("and old usr: %s new: %s",
-                                                      tcon->treeName, uncName));
+                cFYI(1, ("and old usr: %s new: %s",
-                                                if (strncmp
+                        tcon->treeName, uncName));
-                                                    (tcon->ses->userName,
-                                                     userName,
+                if (strncmp(tcon->ses->userName, userName, MAX_USERNAME_SIZE))
-                                                     MAX_USERNAME_SIZE) == 0) {
+                        continue;
-                                                        read_unlock(&GlobalSMBSeslock);
-                                                        /* matched smb session
+                /* matched smb session (user name) */
-                                                        (user name */
+                read_unlock(&GlobalSMBSeslock);
-                                                        return tcon;
+                return tcon;
-                                                }
-                                        }
-                                }
-                        }
-                }
        }
        read_unlock(&GlobalSMBSeslock);
        return NULL;
 }
@@ -1982,7 +1990,6 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                kfree(srvTcp->hostname);
                                goto out;
                        }
-                        wait_for_completion(&cifsd_complete);
                        rc = 0;
                        memcpy(srvTcp->workstation_RFC1001_name,
                                volume_info.source_rfc1001_name, 16);
@@ -2189,15 +2196,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                        srvTcp->tcpStatus = CifsExiting;
                        spin_unlock(&GlobalMid_Lock);
                        if (srvTcp->tsk) {
-                                struct task_struct *tsk;
                                /* If we could verify that kthread_stop would
                                   always wake up processes blocked in
                                   tcp in recv_mesg then we could remove the
                                   send_sig call */
                                force_sig(SIGKILL, srvTcp->tsk);
-                                tsk = srvTcp->tsk;
+                                kthread_stop(srvTcp->tsk);
-                                if (tsk)
-                                        kthread_stop(tsk);
                        }
                }
                 /* If find_unc succeeded then rc == 0 so we can not end */
@@ -2213,23 +2217,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
                                        if ((temp_rc == -ESHUTDOWN) &&
                                            (pSesInfo->server) &&
                                            (pSesInfo->server->tsk)) {
-                                                struct task_struct *tsk;
                                                force_sig(SIGKILL,
                                                        pSesInfo->server->tsk);
-                                                tsk = pSesInfo->server->tsk;
+                                                kthread_stop(pSesInfo->server->tsk);
-                                                if (tsk)
-                                                        kthread_stop(tsk);
                                        }
                                } else {
                                        cFYI(1, ("No session or bad tcon"));
                                        if ((pSesInfo->server) &&
                                            (pSesInfo->server->tsk)) {
-                                                struct task_struct *tsk;
                                                force_sig(SIGKILL,
                                                        pSesInfo->server->tsk);
-                                                tsk = pSesInfo->server->tsk;
+                                                kthread_stop(pSesInfo->server->tsk);
-                                                if (tsk)
-                                                        kthread_stop(tsk);
                                        }
                                }
                                sesInfoFree(pSesInfo);
@@ -2602,7 +2600,7 @@ sesssetup_nomem:	/* do not return an error on nomem for the info strings,
 static int
 CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
-                              struct cifsSesInfo *ses, int *pNTLMv2_flag,
+                              struct cifsSesInfo *ses, bool *pNTLMv2_flag,
                              const struct nls_table *nls_codepage)
 {
        struct smb_hdr *smb_buffer;
@@ -2625,7 +2623,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
        if (ses == NULL)
                return -EINVAL;
        domain = ses->domainName;
-        *pNTLMv2_flag = FALSE;
+        *pNTLMv2_flag = false;
        smb_buffer = cifs_buf_get();
        if (smb_buffer == NULL) {
                return -ENOMEM;
@@ -2778,7 +2776,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
                                       CIFS_CRYPTO_KEY_SIZE);
                                if (SecurityBlob2->NegotiateFlags &
                                        cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2))
-                                        *pNTLMv2_flag = TRUE;
+                                        *pNTLMv2_flag = true;
                                if ((SecurityBlob2->NegotiateFlags &
                                        cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN))
@@ -2939,7 +2937,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 }
 static int
 CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-                        char *ntlm_session_key, int ntlmv2_flag,
+                        char *ntlm_session_key, bool ntlmv2_flag,
                        const struct nls_table *nls_codepage)
 {
        struct smb_hdr *smb_buffer;
@@ -3556,8 +3554,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
        cifs_sb->prepath = NULL;
        kfree(tmp);
        if (ses)
-                schedule_timeout_interruptible(msecs_to_jiffies(500));
-        if (ses)
                sesInfoFree(ses);
        FreeXid(xid);
@@ -3569,7 +3565,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
 {
        int rc = 0;
        char ntlm_session_key[CIFS_SESS_KEY_SIZE];
-        int ntlmv2_flag = FALSE;
+        bool ntlmv2_flag = false;
        int first_time = 0;
        /* what if server changes its buffer size after dropping the session? */
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 0f5c62ba4038..e4e0078a0526 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -119,6 +119,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 {
        int rc = -ENOENT;
        int xid;
+        int create_options = CREATE_NOT_DIR;
        int oplock = 0;
        int desiredAccess = GENERIC_READ | GENERIC_WRITE;
        __u16 fileHandle;
@@ -130,7 +131,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        struct cifsFileInfo *pCifsFile = NULL;
        struct cifsInodeInfo *pCifsInode;
        int disposition = FILE_OVERWRITE_IF;
-        int write_only = FALSE;
+        bool write_only = false;
        xid = GetXid();
@@ -152,7 +153,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                if (oflags & FMODE_WRITE) {
                        desiredAccess |= GENERIC_WRITE;
                        if (!(oflags & FMODE_READ))
-                                write_only = TRUE;
+                                write_only = true;
                }
                if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
@@ -176,9 +177,19 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                FreeXid(xid);
                return -ENOMEM;
        }
+        mode &= ~current->fs->umask;
+        /*
+         * if we're not using unix extensions, see if we need to set
+         * ATTR_READONLY on the create call
+         */
+        if (!pTcon->unix_ext && (mode & S_IWUGO) == 0)
+                create_options |= CREATE_OPTION_READONLY;
        if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
                rc = CIFSSMBOpen(xid, pTcon, full_path, disposition,
-                         desiredAccess, CREATE_NOT_DIR,
+                         desiredAccess, create_options,
                         &fileHandle, &oplock, buf, cifs_sb->local_nls,
                         cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
        else
@@ -187,7 +198,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
        if (rc == -EIO) {
                /* old server, retry the open legacy style */
                rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
-                        desiredAccess, CREATE_NOT_DIR,
+                        desiredAccess, create_options,
                        &fileHandle, &oplock, buf, cifs_sb->local_nls,
                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
        }
@@ -197,7 +208,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                /* If Open reported that we actually created a file
                then we now have to set the mode if possible */
                if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
-                        mode &= ~current->fs->umask;
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode,
                                        (__u64)current->fsuid,
@@ -254,7 +264,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                        d_instantiate(direntry, newinode);
                }
                if ((nd == NULL /* nfsd case - nfs srv does not set nd */) ||
-                        ((nd->flags & LOOKUP_OPEN) == FALSE)) {
+                        (!(nd->flags & LOOKUP_OPEN))) {
                        /* mknod case - do not leave file open */
                        CIFSSMBClose(xid, pTcon, fileHandle);
                } else if (newinode) {
@@ -266,8 +276,8 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                        pCifsFile->netfid = fileHandle;
                        pCifsFile->pid = current->tgid;
                        pCifsFile->pInode = newinode;
-                        pCifsFile->invalidHandle = FALSE;
+                        pCifsFile->invalidHandle = false;
-                        pCifsFile->closePend     = FALSE;
+                        pCifsFile->closePend     = false;
                        init_MUTEX(&pCifsFile->fh_sem);
                        mutex_init(&pCifsFile->lock_mutex);
                        INIT_LIST_HEAD(&pCifsFile->llist);
@@ -280,7 +290,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                        pCifsInode = CIFS_I(newinode);
                        if (pCifsInode) {
                                /* if readable file instance put first in list*/
-                                if (write_only == TRUE) {
+                                if (write_only) {
                                        list_add_tail(&pCifsFile->flist,
                                                &pCifsInode->openFileList);
                                } else {
@@ -288,12 +298,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
                                                &pCifsInode->openFileList);
                                }
                                if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                                        pCifsInode->clientCanCacheAll = TRUE;
+                                        pCifsInode->clientCanCacheAll = true;
-                                        pCifsInode->clientCanCacheRead = TRUE;
+                                        pCifsInode->clientCanCacheRead = true;
                                        cFYI(1, ("Exclusive Oplock inode %p",
                                                newinode));
                                } else if ((oplock & 0xF) == OPLOCK_READ)
-                                        pCifsInode->clientCanCacheRead = TRUE;
+                                        pCifsInode->clientCanCacheRead = true;
                        }
                        write_unlock(&GlobalSMBSeslock);
                }
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 7cc86c418182..939e256f8497 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -55,6 +55,32 @@ struct key_type key_type_dns_resolver = {
        .match       = user_match,
 };
+/* Checks if supplied name is IP address
+ * returns:
+ *              1 - name is IP
+ *              0 - name is not IP
+ */
+static int is_ip(const char *name)
+{
+        int rc;
+        struct sockaddr_in sin_server;
+        struct sockaddr_in6 sin_server6;
+        rc = cifs_inet_pton(AF_INET, name,
+                        &sin_server.sin_addr.s_addr);
+        if (rc <= 0) {
+                /* not ipv4 address, try ipv6 */
+                rc = cifs_inet_pton(AF_INET6, name,
+                                &sin_server6.sin6_addr.in6_u);
+                if (rc > 0)
+                        return 1;
+        } else {
+                return 1;
+        }
+        /* we failed translating address */
+        return 0;
+}
 /* Resolves server name to ip address.
 * input:
@@ -67,8 +93,9 @@ int
 dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
 {
        int rc = -EAGAIN;
-        struct key *rkey;
+        struct key *rkey = ERR_PTR(-EAGAIN);
        char *name;
+        char *data = NULL;
        int len;
        if (!ip_addr || !unc)
@@ -97,26 +124,41 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
        memcpy(name, unc+2, len);
        name[len] = 0;
+        if (is_ip(name)) {
+                cFYI(1, ("%s: it is IP, skipping dns upcall: %s",
+                                        __func__, name));
+                data = name;
+                goto skip_upcall;
+        }
        rkey = request_key(&key_type_dns_resolver, name, "");
        if (!IS_ERR(rkey)) {
-                len = strlen(rkey->payload.data);
+                data = rkey->payload.data;
-                *ip_addr = kmalloc(len+1, GFP_KERNEL);
+                cFYI(1, ("%s: resolved: %s to %s", __func__,
-                if (*ip_addr) {
-                        memcpy(*ip_addr, rkey->payload.data, len);
-                        (*ip_addr)[len] = '\0';
-                        cFYI(1, ("%s: resolved: %s to %s", __func__,
                                        rkey->description,
                                        *ip_addr
                                ));
+        } else {
+                cERROR(1, ("%s: unable to resolve: %s", __func__, name));
+                goto out;
+        }
+skip_upcall:
+        if (data) {
+                len = strlen(data);
+                *ip_addr = kmalloc(len+1, GFP_KERNEL);
+                if (*ip_addr) {
+                        memcpy(*ip_addr, data, len);
+                        (*ip_addr)[len] = '\0';
                        rc = 0;
                } else {
                        rc = -ENOMEM;
                }
-                key_put(rkey);
+                if (!IS_ERR(rkey))
-        } else {
+                        key_put(rkey);
-                cERROR(1, ("%s: unable to resolve: %s", __func__, name));
        }
+out:
        kfree(name);
        return rc;
 }
diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c
index 7d1d5aa4c430..5a57581eb4b2 100644
--- a/fs/cifs/fcntl.c
+++ b/fs/cifs/fcntl.c
@@ -68,7 +68,7 @@ int cifs_dir_notify(struct file *file, unsigned long arg)
 {
        int xid;
        int rc = -EINVAL;
-        int oplock = FALSE;
+        int oplock = 0;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        char *full_path = NULL;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 40b690073fc1..31a0a33b9d95 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -51,8 +51,8 @@ static inline struct cifsFileInfo *cifs_init_private(
        INIT_LIST_HEAD(&private_data->llist);
        private_data->pfile = file; /* needed for writepage */
        private_data->pInode = inode;
-        private_data->invalidHandle = FALSE;
+        private_data->invalidHandle = false;
-        private_data->closePend = FALSE;
+        private_data->closePend = false;
        /* we have to track num writers to the inode, since writepages
        does not tell us which handle the write is for so there can
        be a close (overlapping with write) of the filehandle that
@@ -148,12 +148,12 @@ client_can_cache:
                        full_path, buf, inode->i_sb, xid, NULL);
        if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                pCifsInode->clientCanCacheAll = TRUE;
+                pCifsInode->clientCanCacheAll = true;
-                pCifsInode->clientCanCacheRead = TRUE;
+                pCifsInode->clientCanCacheRead = true;
                cFYI(1, ("Exclusive Oplock granted on inode %p",
                         file->f_path.dentry->d_inode));
        } else if ((*oplock & 0xF) == OPLOCK_READ)
-                pCifsInode->clientCanCacheRead = TRUE;
+                pCifsInode->clientCanCacheRead = true;
        return rc;
 }
@@ -247,7 +247,7 @@ int cifs_open(struct inode *inode, struct file *file)
        if (oplockEnabled)
                oplock = REQ_OPLOCK;
        else
-                oplock = FALSE;
+                oplock = 0;
        /* BB pass O_SYNC flag through on file attributes .. BB */
@@ -339,7 +339,7 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile)
        return rc;
 }
-static int cifs_reopen_file(struct file *file, int can_flush)
+static int cifs_reopen_file(struct file *file, bool can_flush)
 {
        int rc = -EACCES;
        int xid, oplock;
@@ -360,7 +360,7 @@ static int cifs_reopen_file(struct file *file, int can_flush)
        xid = GetXid();
        down(&pCifsFile->fh_sem);
-        if (pCifsFile->invalidHandle == FALSE) {
+        if (!pCifsFile->invalidHandle) {
                up(&pCifsFile->fh_sem);
                FreeXid(xid);
                return 0;
@@ -404,7 +404,7 @@ reopen_error_exit:
        if (oplockEnabled)
                oplock = REQ_OPLOCK;
        else
-                oplock = FALSE;
+                oplock = 0;
        /* Can not refresh inode by passing in file_info buf to be returned
           by SMBOpen and then calling get_inode_info with returned buf
@@ -422,7 +422,7 @@ reopen_error_exit:
                cFYI(1, ("oplock: %d", oplock));
        } else {
                pCifsFile->netfid = netfid;
-                pCifsFile->invalidHandle = FALSE;
+                pCifsFile->invalidHandle = false;
                up(&pCifsFile->fh_sem);
                pCifsInode = CIFS_I(inode);
                if (pCifsInode) {
@@ -432,8 +432,8 @@ reopen_error_exit:
                                        CIFS_I(inode)->write_behind_rc = rc;
                        /* temporarily disable caching while we
                           go to server to get inode info */
-                                pCifsInode->clientCanCacheAll = FALSE;
+                                pCifsInode->clientCanCacheAll = false;
-                                pCifsInode->clientCanCacheRead = FALSE;
+                                pCifsInode->clientCanCacheRead = false;
                                if (pTcon->unix_ext)
                                        rc = cifs_get_inode_info_unix(&inode,
                                                full_path, inode->i_sb, xid);
@@ -448,16 +448,16 @@ reopen_error_exit:
                             we can not go to the server to get the new inod
                             info */
                        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                                pCifsInode->clientCanCacheAll = TRUE;
+                                pCifsInode->clientCanCacheAll = true;
-                                pCifsInode->clientCanCacheRead = TRUE;
+                                pCifsInode->clientCanCacheRead = true;
                                cFYI(1, ("Exclusive Oplock granted on inode %p",
                                         file->f_path.dentry->d_inode));
                        } else if ((oplock & 0xF) == OPLOCK_READ) {
-                                pCifsInode->clientCanCacheRead = TRUE;
+                                pCifsInode->clientCanCacheRead = true;
-                                pCifsInode->clientCanCacheAll = FALSE;
+                                pCifsInode->clientCanCacheAll = false;
                        } else {
-                                pCifsInode->clientCanCacheRead = FALSE;
+                                pCifsInode->clientCanCacheRead = false;
-                                pCifsInode->clientCanCacheAll = FALSE;
+                                pCifsInode->clientCanCacheAll = false;
                        }
                        cifs_relock_file(pCifsFile);
                }
@@ -484,7 +484,7 @@ int cifs_close(struct inode *inode, struct file *file)
        if (pSMBFile) {
                struct cifsLockInfo *li, *tmp;
-                pSMBFile->closePend = TRUE;
+                pSMBFile->closePend = true;
                if (pTcon) {
                        /* no sense reconnecting to close a file that is
                           already closed */
@@ -553,8 +553,8 @@ int cifs_close(struct inode *inode, struct file *file)
                cFYI(1, ("closing last open instance for inode %p", inode));
                /* if the file is not open we do not know if we can cache info
                   on this inode, much less write behind and read ahead */
-                CIFS_I(inode)->clientCanCacheRead = FALSE;
+                CIFS_I(inode)->clientCanCacheRead = false;
-                CIFS_I(inode)->clientCanCacheAll  = FALSE;
+                CIFS_I(inode)->clientCanCacheAll  = false;
        }
        read_unlock(&GlobalSMBSeslock);
        if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
@@ -583,9 +583,9 @@ int cifs_closedir(struct inode *inode, struct file *file)
                pTcon = cifs_sb->tcon;
                cFYI(1, ("Freeing private data in close dir"));
-                if ((pCFileStruct->srch_inf.endOfSearch == FALSE) &&
+                if (!pCFileStruct->srch_inf.endOfSearch &&
-                   (pCFileStruct->invalidHandle == FALSE)) {
+                    !pCFileStruct->invalidHandle) {
-                        pCFileStruct->invalidHandle = TRUE;
+                        pCFileStruct->invalidHandle = true;
                        rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
                        cFYI(1, ("Closing uncompleted readdir with rc %d",
                                 rc));
@@ -637,12 +637,12 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        __u32 numLock = 0;
        __u32 numUnlock = 0;
        __u64 length;
-        int wait_flag = FALSE;
+        bool wait_flag = false;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        __u16 netfid;
        __u8 lockType = LOCKING_ANDX_LARGE_FILES;
-        int posix_locking;
+        bool posix_locking;
        length = 1 + pfLock->fl_end - pfLock->fl_start;
        rc = -EACCES;
@@ -659,7 +659,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                cFYI(1, ("Flock"));
        if (pfLock->fl_flags & FL_SLEEP) {
                cFYI(1, ("Blocking lock"));
-                wait_flag = TRUE;
+                wait_flag = true;
        }
        if (pfLock->fl_flags & FL_ACCESS)
                cFYI(1, ("Process suspended by mandatory locking - "
@@ -794,7 +794,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
                                        stored_rc = CIFSSMBLock(xid, pTcon,
                                                        netfid,
                                                        li->length, li->offset,
-                                                        1, 0, li->type, FALSE);
+                                                        1, 0, li->type, false);
                                        if (stored_rc)
                                                rc = stored_rc;
@@ -866,7 +866,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
                                   filemap_fdatawait from here so tell
                                   reopen_file not to flush data to server
                                   now */
-                                rc = cifs_reopen_file(file, FALSE);
+                                rc = cifs_reopen_file(file, false);
                                if (rc != 0)
                                        break;
                        }
@@ -966,7 +966,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
                                   filemap_fdatawait from here so tell
                                   reopen_file not to flush data to
                                   server now */
-                                rc = cifs_reopen_file(file, FALSE);
+                                rc = cifs_reopen_file(file, false);
                                if (rc != 0)
                                        break;
                        }
@@ -1093,7 +1093,7 @@ refind_writable:
                        read_unlock(&GlobalSMBSeslock);
                        /* Had to unlock since following call can block */
-                        rc = cifs_reopen_file(open_file->pfile, FALSE);
+                        rc = cifs_reopen_file(open_file->pfile, false);
                        if (!rc) {
                                if (!open_file->closePend)
                                        return open_file;
@@ -1608,7 +1608,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
                        int buf_type = CIFS_NO_BUFFER;
                        if ((open_file->invalidHandle) &&
                            (!open_file->closePend)) {
-                                rc = cifs_reopen_file(file, TRUE);
+                                rc = cifs_reopen_file(file, true);
                                if (rc != 0)
                                        break;
                        }
@@ -1693,7 +1693,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
                while (rc == -EAGAIN) {
                        if ((open_file->invalidHandle) &&
                            (!open_file->closePend)) {
-                                rc = cifs_reopen_file(file, TRUE);
+                                rc = cifs_reopen_file(file, true);
                                if (rc != 0)
                                        break;
                        }
@@ -1850,7 +1850,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                while (rc == -EAGAIN) {
                        if ((open_file->invalidHandle) &&
                            (!open_file->closePend)) {
-                                rc = cifs_reopen_file(file, TRUE);
+                                rc = cifs_reopen_file(file, true);
                                if (rc != 0)
                                        break;
                        }
@@ -2009,10 +2009,10 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
   refreshing the inode only on increases in the file size
   but this is tricky to do without racing with writebehind
   page caching in the current Linux kernel design */
-int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
+bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
 {
        if (!cifsInode)
-                return 1;
+                return true;
        if (is_inode_writable(cifsInode)) {
                /* This inode is open for write at least once */
@@ -2022,15 +2022,15 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
                        /* since no page cache to corrupt on directio
                        we can change size safely */
-                        return 1;
+                        return true;
                }
                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
-                        return 1;
+                        return true;
-                return 0;
+                return false;
        } else
-                return 1;
+                return true;
 }
 static int cifs_prepare_write(struct file *file, struct page *page,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e1031b9e2c55..fcbdbb6ad7bf 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -281,7 +281,7 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
                            struct cifs_sb_info *cifs_sb, int xid)
 {
        int rc;
-        int oplock = FALSE;
+        int oplock = 0;
        __u16 netfid;
        struct cifsTconInfo *pTcon = cifs_sb->tcon;
        char buf[24];
@@ -389,7 +389,7 @@ int cifs_get_inode_info(struct inode **pinode,
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
        const unsigned char *full_path = NULL;
        char *buf = NULL;
-        int adjustTZ = FALSE;
+        bool adjustTZ = false;
        bool is_dfs_referral = false;
        pTcon = cifs_sb->tcon;
@@ -425,7 +425,7 @@ try_again_CIFSSMBQPathInfo:
                                        pfindData, cifs_sb->local_nls,
                                        cifs_sb->mnt_cifs_flags &
                                          CIFS_MOUNT_MAP_SPECIAL_CHR);
-                        adjustTZ = TRUE;
+                        adjustTZ = true;
                }
        }
        /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */
@@ -703,7 +703,7 @@ psx_del_no_retry:
        } else if (rc == -ENOENT) {
                d_drop(direntry);
        } else if (rc == -ETXTBSY) {
-                int oplock = FALSE;
+                int oplock = 0;
                __u16 netfid;
                rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE,
@@ -736,7 +736,7 @@ psx_del_no_retry:
                                rc = -EOPNOTSUPP;
                        if (rc == -EOPNOTSUPP) {
-                                int oplock = FALSE;
+                                int oplock = 0;
                                __u16 netfid;
                        /*      rc = CIFSSMBSetAttrLegacy(xid, pTcon,
                                                          full_path,
@@ -774,7 +774,7 @@ psx_del_no_retry:
                                if (direntry->d_inode)
                                        drop_nlink(direntry->d_inode);
                        } else if (rc == -ETXTBSY) {
-                                int oplock = FALSE;
+                                int oplock = 0;
                                __u16 netfid;
                                rc = CIFSSMBOpen(xid, pTcon, full_path,
@@ -974,8 +974,8 @@ mkdir_get_info:
                  * failed to get it from the server or was set bogus */
                if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
                                direntry->d_inode->i_nlink = 2;
+                mode &= ~current->fs->umask;
                if (pTcon->unix_ext) {
-                        mode &= ~current->fs->umask;
                        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
                                CIFSSMBUnixSetPerms(xid, pTcon, full_path,
                                                    mode,
@@ -994,9 +994,16 @@ mkdir_get_info:
                                                    CIFS_MOUNT_MAP_SPECIAL_CHR);
                        }
                } else {
-                        /* BB to be implemented via Windows secrty descriptors
+                        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
-                           eg CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
+                            (mode & S_IWUGO) == 0) {
-                                                 -1, -1, local_nls); */
+                                FILE_BASIC_INFO pInfo;
+                                memset(&pInfo, 0, sizeof(pInfo));
+                                pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
+                                CIFSSMBSetTimes(xid, pTcon, full_path,
+                                                &pInfo, cifs_sb->local_nls,
+                                                cifs_sb->mnt_cifs_flags &
+                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
+                        }
                        if (direntry->d_inode) {
                                direntry->d_inode->i_mode = mode;
                                direntry->d_inode->i_mode |= S_IFDIR;
@@ -1149,7 +1156,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
                cFYI(1, ("rename rc %d", rc));
        if ((rc == -EIO) || (rc == -EEXIST)) {
-                int oplock = FALSE;
+                int oplock = 0;
                __u16 netfid;
                /* BB FIXME Is Generic Read correct for rename? */
@@ -1186,7 +1193,7 @@ int cifs_revalidate(struct dentry *direntry)
        struct cifsInodeInfo *cifsInode;
        loff_t local_size;
        struct timespec local_mtime;
-        int invalidate_inode = FALSE;
+        bool invalidate_inode = false;
        if (direntry->d_inode == NULL)
                return -ENOENT;
@@ -1268,7 +1275,7 @@ int cifs_revalidate(struct dentry *direntry)
                           only ones who could have modified the file and the
                           server copy is staler than ours */
                } else {
-                        invalidate_inode = TRUE;
+                        invalidate_inode = true;
                }
        }
@@ -1402,24 +1409,25 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        int rc = -EACCES;
        struct cifsFileInfo *open_file = NULL;
        FILE_BASIC_INFO time_buf;
-        int set_time = FALSE;
+        bool set_time = false;
-        int set_dosattr = FALSE;
+        bool set_dosattr = false;
        __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
        __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
        __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
        struct cifsInodeInfo *cifsInode;
+        struct inode *inode = direntry->d_inode;
        xid = GetXid();
        cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
                 direntry->d_name.name, attrs->ia_valid));
-        cifs_sb = CIFS_SB(direntry->d_inode->i_sb);
+        cifs_sb = CIFS_SB(inode->i_sb);
        pTcon = cifs_sb->tcon;
        if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
                /* check if we have permission to change attrs */
-                rc = inode_change_ok(direntry->d_inode, attrs);
+                rc = inode_change_ok(inode, attrs);
                if (rc < 0) {
                        FreeXid(xid);
                        return rc;
@@ -1432,7 +1440,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                FreeXid(xid);
                return -ENOMEM;
        }
-        cifsInode = CIFS_I(direntry->d_inode);
+        cifsInode = CIFS_I(inode);
        if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
                /*
@@ -1443,9 +1451,9 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                   will be truncated anyway? Also, should we error out here if
                   the flush returns error?
                 */
-                rc = filemap_write_and_wait(direntry->d_inode->i_mapping);
+                rc = filemap_write_and_wait(inode->i_mapping);
                if (rc != 0) {
-                        CIFS_I(direntry->d_inode)->write_behind_rc = rc;
+                        cifsInode->write_behind_rc = rc;
                        rc = 0;
                }
        }
@@ -1464,7 +1472,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                        __u16 nfid = open_file->netfid;
                        __u32 npid = open_file->pid;
                        rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
-                                                nfid, npid, FALSE);
+                                                nfid, npid, false);
                        atomic_dec(&open_file->wrtPending);
                        cFYI(1, ("SetFSize for attrs rc = %d", rc));
                        if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
@@ -1484,14 +1492,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                           it was found or because there was an error setting
                           it by handle */
                        rc = CIFSSMBSetEOF(xid, pTcon, full_path,
-                                           attrs->ia_size, FALSE,
+                                           attrs->ia_size, false,
                                           cifs_sb->local_nls,
                                           cifs_sb->mnt_cifs_flags &
                                                CIFS_MOUNT_MAP_SPECIAL_CHR);
                        cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
                        if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
                                __u16 netfid;
-                                int oplock = FALSE;
+                                int oplock = 0;
                                rc = SMBLegacyOpen(xid, pTcon, full_path,
                                        FILE_OPEN,
@@ -1516,14 +1524,13 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                /* Server is ok setting allocation size implicitly - no need
                   to call:
-                CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, TRUE,
+                CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true,
                         cifs_sb->local_nls);
                   */
                if (rc == 0) {
-                        rc = cifs_vmtruncate(direntry->d_inode, attrs->ia_size);
+                        rc = cifs_vmtruncate(inode, attrs->ia_size);
-                        cifs_truncate_page(direntry->d_inode->i_mapping,
+                        cifs_truncate_page(inode->i_mapping, inode->i_size);
-                                           direntry->d_inode->i_size);
                } else
                        goto cifs_setattr_exit;
        }
@@ -1557,14 +1564,14 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                rc = 0;
 #ifdef CONFIG_CIFS_EXPERIMENTAL
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-                        rc = mode_to_acl(direntry->d_inode, full_path, mode);
+                        rc = mode_to_acl(inode, full_path, mode);
                else if ((mode & S_IWUGO) == 0) {
 #else
                if ((mode & S_IWUGO) == 0) {
 #endif
                        /* not writeable */
                        if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
-                                set_dosattr = TRUE;
+                                set_dosattr = true;
                                time_buf.Attributes =
                                        cpu_to_le32(cifsInode->cifsAttrs |
                                                    ATTR_READONLY);
@@ -1574,28 +1581,24 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                        not be able to write to it - so if any write
                        bit is enabled for user or group or other we
                        need to at least try to remove r/o dos attr */
-                        set_dosattr = TRUE;
+                        set_dosattr = true;
                        time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
                                            (~ATTR_READONLY));
                        /* Windows ignores set to zero */
                        if (time_buf.Attributes == 0)
                                time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
                }
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-                        mode_to_acl(direntry->d_inode, full_path, mode);
-#endif
        }
        if (attrs->ia_valid & ATTR_ATIME) {
-                set_time = TRUE;
+                set_time = true;
                time_buf.LastAccessTime =
                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
        } else
                time_buf.LastAccessTime = 0;
        if (attrs->ia_valid & ATTR_MTIME) {
-                set_time = TRUE;
+                set_time = true;
                time_buf.LastWriteTime =
                    cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
        } else
@@ -1606,7 +1609,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
           server times */
        if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
-                set_time = TRUE;
+                set_time = true;
                /* Although Samba throws this field away
                it may be useful to Windows - but we do
                not want to set ctime unless some other
@@ -1630,7 +1633,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
                        rc = -EOPNOTSUPP;
                if (rc == -EOPNOTSUPP) {
-                        int oplock = FALSE;
+                        int oplock = 0;
                        __u16 netfid;
                        cFYI(1, ("calling SetFileInfo since SetPathInfo for "
@@ -1669,7 +1672,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        /* do not need local check to inode_check_ok since the server does
           that */
        if (!rc)
-                rc = inode_setattr(direntry->d_inode, attrs);
+                rc = inode_setattr(inode, attrs);
 cifs_setattr_exit:
        kfree(full_path);
        FreeXid(xid);
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index d4e7ec93285f..1c2c3ce5020b 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -230,7 +230,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
        struct inode *inode = direntry->d_inode;
        int rc = -EACCES;
        int xid;
-        int oplock = FALSE;
+        int oplock = 0;
        struct cifs_sb_info *cifs_sb;
        struct cifsTconInfo *pTcon;
        char *full_path = NULL;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a42d9fedbb2..1d69b8014e0b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -496,7 +496,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
        }
        return 0;
 }
-int
+bool
 is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
 {
        struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
@@ -522,17 +523,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                                pnotify->Action));  /* BB removeme BB */
                        /*   cifs_dump_mem("Rcvd notify Data: ",buf,
                                sizeof(struct smb_hdr)+60); */
-                        return TRUE;
+                        return true;
                }
                if (pSMBr->hdr.Status.CifsError) {
                        cFYI(1, ("notify err 0x%d",
                                pSMBr->hdr.Status.CifsError));
-                        return TRUE;
+                        return true;
                }
-                return FALSE;
+                return false;
        }
        if (pSMB->hdr.Command != SMB_COM_LOCKING_ANDX)
-                return FALSE;
+                return false;
        if (pSMB->hdr.Flags & SMBFLG_RESPONSE) {
                /* no sense logging error on invalid handle on oplock
                   break - harmless race between close request and oplock
@@ -541,21 +542,21 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                if ((NT_STATUS_INVALID_HANDLE) ==
                   le32_to_cpu(pSMB->hdr.Status.CifsError)) {
                        cFYI(1, ("invalid handle on oplock break"));
-                        return TRUE;
+                        return true;
                } else if (ERRbadfid ==
                   le16_to_cpu(pSMB->hdr.Status.DosError.Error)) {
-                        return TRUE;
+                        return true;
                } else {
-                        return FALSE; /* on valid oplock brk we get "request" */
+                        return false; /* on valid oplock brk we get "request" */
                }
        }
        if (pSMB->hdr.WordCount != 8)
-                return FALSE;
+                return false;
        cFYI(1, ("oplock type 0x%d level 0x%d",
                 pSMB->LockType, pSMB->OplockLevel));
        if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
-                return FALSE;
+                return false;
        /* look up tcon based on tid & uid */
        read_lock(&GlobalSMBSeslock);
@@ -573,11 +574,11 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                                            ("file id match, oplock break"));
                                        pCifsInode =
                                                CIFS_I(netfile->pInode);
-                                        pCifsInode->clientCanCacheAll = FALSE;
+                                        pCifsInode->clientCanCacheAll = false;
                                        if (pSMB->OplockLevel == 0)
                                                pCifsInode->clientCanCacheRead
-                                                        = FALSE;
+                                                        = false;
-                                        pCifsInode->oplockPending = TRUE;
+                                        pCifsInode->oplockPending = true;
                                        AllocOplockQEntry(netfile->pInode,
                                                          netfile->netfid,
                                                          tcon);
@@ -585,17 +586,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                                            ("about to wake up oplock thread"));
                                        if (oplockThread)
                                            wake_up_process(oplockThread);
-                                        return TRUE;
+                                        return true;
                                }
                        }
                        read_unlock(&GlobalSMBSeslock);
                        cFYI(1, ("No matching file for oplock break"));
-                        return TRUE;
+                        return true;
                }
        }
        read_unlock(&GlobalSMBSeslock);
        cFYI(1, ("Can not process oplock break for non-existent connection"));
-        return TRUE;
+        return true;
 }
 void
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 3b5a5ce882b6..00f4cff400b3 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -132,47 +132,17 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
        {0, 0}
 };
-/* if the mount helper is missing we need to reverse the 1st slash
-   from '/' to backslash in order to format the UNC properly for
-   ip address parsing and for tree connect (unless the user
-   remembered to put the UNC name in properly). Fortunately we do
-   not have to call this twice (we check for IPv4 addresses
-   first, so it is already converted by the time we
-   try IPv6 addresses */
-static int canonicalize_unc(char *cp)
-{
-        int i;
-        for (i = 0; i <= 46 /* INET6_ADDRSTRLEN */ ; i++) {
-                if (cp[i] == 0)
-                        break;
-                if (cp[i] == '\\')
-                        break;
-                if (cp[i] == '/') {
-                        cFYI(DBG2, ("change slash to \\ in malformed UNC"));
-                        cp[i] = '\\';
-                        return 1;
-                }
-        }
-        return 0;
-}
 /* Convert string containing dotted ip address to binary form */
 /* returns 0 if invalid address */
 int
-cifs_inet_pton(int address_family, char *cp, void *dst)
+cifs_inet_pton(const int address_family, const char *cp, void *dst)
 {
        int ret = 0;
        /* calculate length by finding first slash or NULL */
        if (address_family == AF_INET) {
                ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL);
-                if (ret == 0) {
-                        if (canonicalize_unc(cp))
-                                ret = in4_pton(cp, -1, dst, '\\', NULL);
-                }
        } else if (address_family == AF_INET6) {
                ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
        }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 32b445edc882..34ec32100c72 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -447,8 +447,8 @@ static int initiate_cifs_search(const int xid, struct file *file)
        if (file->private_data == NULL)
                return -ENOMEM;
        cifsFile = file->private_data;
-        cifsFile->invalidHandle = TRUE;
+        cifsFile->invalidHandle = true;
-        cifsFile->srch_inf.endOfSearch = FALSE;
+        cifsFile->srch_inf.endOfSearch = false;
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        if (cifs_sb == NULL)
@@ -485,7 +485,7 @@ ffirst_retry:
                cifs_sb->mnt_cifs_flags &
                        CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
        if (rc == 0)
-                cifsFile->invalidHandle = FALSE;
+                cifsFile->invalidHandle = false;
        if ((rc == -EOPNOTSUPP) &&
                (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
                cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
@@ -670,7 +670,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
           (index_to_find < first_entry_in_buffer)) {
                /* close and restart search */
                cFYI(1, ("search backing up - close and restart search"));
-                cifsFile->invalidHandle = TRUE;
+                cifsFile->invalidHandle = true;
                CIFSFindClose(xid, pTcon, cifsFile->netfid);
                kfree(cifsFile->search_resume_name);
                cifsFile->search_resume_name = NULL;
@@ -692,7 +692,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
        }
        while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
-              (rc == 0) && (cifsFile->srch_inf.endOfSearch == FALSE)) {
+              (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
                cFYI(1, ("calling findnext2"));
                rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
                                  &cifsFile->srch_inf);
@@ -1038,7 +1038,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
                                break;
                        }
                } /* else {
-                        cifsFile->invalidHandle = TRUE;
+                        cifsFile->invalidHandle = true;
                        CIFSFindClose(xid, pTcon, cifsFile->netfid);
                }
                kfree(cifsFile->search_resume_name);
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 58bbfd992cc0..ff3232fa1015 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -35,11 +35,11 @@
 #include "cifs_debug.h"
 #include "cifsencrypt.h"
-#ifndef FALSE
+#ifndef false
-#define FALSE 0
+#define false 0
 #endif
-#ifndef TRUE
+#ifndef true
-#define TRUE 1
+#define true 1
 #endif
 /* following came from the other byteorder.h to avoid include conflicts */
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 8cd6a445b017..e9527eedc639 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -264,7 +264,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 #ifdef CONFIG_CIFS_EXPERIMENTAL
                else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
                        __u16 fid;
-                        int oplock = FALSE;
+                        int oplock = 0;
                        struct cifs_ntsd *pacl = NULL;
                        __u32 buflen = 0;
                        if (experimEnabled)
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 95a54253c047..e1c854890f94 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
        unsigned int valid;
        /* clean out */        
-        vattr->va_mode = (umode_t) -1;
+        vattr->va_mode = -1;
        vattr->va_uid = (vuid_t) -1; 
        vattr->va_gid = (vgid_t) -1;
        vattr->va_size = (off_t) -1;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index f89ff083079b..3d2580e00a3e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
 }
 /* destruction routines: unlink, rmdir */
-int coda_unlink(struct inode *dir, struct dentry *de)
+static int coda_unlink(struct inode *dir, struct dentry *de)
 {
        int error;
        const char *name = de->d_name.name;
@@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de)
        return 0;
 }
-int coda_rmdir(struct inode *dir, struct dentry *de)
+static int coda_rmdir(struct inode *dir, struct dentry *de)
 {
        const char *name = de->d_name.name;
        int len = de->d_name.len;
@@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
 /* file operations for directories */
-int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
+static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
 {
        struct coda_file_info *cfi;
        struct file *host_file;
diff --git a/fs/compat.c b/fs/compat.c
index 2ce4456aad30..332a869d2c53 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
 #include <linux/fcntl.h>
 #include <linux/namei.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/vfs.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
@@ -1634,7 +1635,7 @@ sticky:
        return ret;
 }
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
        compat_ulong_t __user *outp, compat_ulong_t __user *exp,
        struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1720,7 +1721,7 @@ sticky:
                if (sigmask) {
                        memcpy(&current->saved_sigmask, &sigsaved,
                                        sizeof(sigsaved));
-                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                        set_restore_sigmask();
                }
        } else if (sigmask)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1791,7 +1792,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
                if (sigmask) {
                        memcpy(&current->saved_sigmask, &sigsaved,
                                sizeof(sigsaved));
-                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                        set_restore_sigmask();
                }
                ret = -ERESTARTNOHAND;
        } else if (sigmask)
@@ -1825,7 +1826,7 @@ sticky:
        return ret;
 }
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
 #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
 /* Stuff for NFS server syscalls... */
@@ -2080,7 +2081,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
 #ifdef CONFIG_EPOLL
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long compat_sys_epoll_pwait(int epfd,
                        struct compat_epoll_event __user *events,
                        int maxevents, int timeout,
@@ -2117,14 +2118,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
                if (err == -EINTR) {
                        memcpy(&current->saved_sigmask, &sigsaved,
                               sizeof(sigsaved));
-                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                        set_restore_sigmask();
                } else
                        sigprocmask(SIG_SETMASK, &sigsaved, NULL);
        }
        return err;
 }
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
 #endif /* CONFIG_EPOLL */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c6e72aebd16b..97dba0d92348 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1046,14 +1046,14 @@ static int vt_check(struct file *file)
        struct inode *inode = file->f_path.dentry->d_inode;
        struct vc_data *vc;
        
-        if (file->f_op->ioctl != tty_ioctl)
+        if (file->f_op->unlocked_ioctl != tty_ioctl)
                return -EINVAL;
                        
        tty = (struct tty_struct *)file->private_data;
        if (tty_paranoia_check(tty, inode, "tty_ioctl"))
                return -EINVAL;
                                                        
-        if (tty->driver->ioctl != vt_ioctl)
+        if (tty->ops->ioctl != vt_ioctl)
                return -EINVAL;
        vc = (struct vc_data *)tty->driver_data;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 397cb503a180..2b6cb23dd14e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
                        goto out;
        }
        pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
-                 __FUNCTION__, count, *ppos, buffer->page);
+                 __func__, count, *ppos, buffer->page);
        retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
                                         buffer->count);
 out:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4c1ebff778ee..b9a1d810346d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = {
 static struct backing_dev_info configfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
-        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 static const struct inode_operations configfs_inode_operations ={
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index de3b31d0a37d..8421cea7d8c7 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
        root = d_alloc_root(inode);
        if (!root) {
-                pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
+                pr_debug("%s: could not get root dentry!\n",__func__);
                iput(inode);
                return -ENOMEM;
        }
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78929ea84ff2..2a731ef5f305 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite
        if (size > PATH_MAX)
                return -ENAMETOOLONG;
-        pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size);
+        pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size);
        for (s = path; depth--; s += 3)
                strcpy(s,"../");
        fill_item_path(target, path, size);
-        pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
+        pr_debug("%s: path = '%s'\n", __func__, path);
        return 0;
 }
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fddffe4851f5..159a5efd6a8a 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
 *      2 as published by the Free Software Foundation.
 *
 *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/kernel-api for more details.
+ *  See Documentation/DocBook/filesystems for more details.
 *
 */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f120e1207874..285b64a8b06e 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -17,6 +17,8 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/tty.h>
+#include <linux/mutex.h>
+#include <linux/idr.h>
 #include <linux/devpts_fs.h>
 #include <linux/parser.h>
 #include <linux/fsnotify.h>
@@ -26,6 +28,10 @@
 #define DEVPTS_DEFAULT_MODE 0600
+extern int pty_limit;                   /* Config limit on Unix98 ptys */
+static DEFINE_IDR(allocated_ptys);
+static DEFINE_MUTEX(allocated_ptys_lock);
 static struct vfsmount *devpts_mnt;
 static struct dentry *devpts_root;
@@ -171,9 +177,44 @@ static struct dentry *get_node(int num)
        return lookup_one_len(s, root, sprintf(s, "%d", num));
 }
+int devpts_new_index(void)
+{
+        int index;
+        int idr_ret;
+retry:
+        if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+                return -ENOMEM;
+        }
+        mutex_lock(&allocated_ptys_lock);
+        idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+        if (idr_ret < 0) {
+                mutex_unlock(&allocated_ptys_lock);
+                if (idr_ret == -EAGAIN)
+                        goto retry;
+                return -EIO;
+        }
+        if (index >= pty_limit) {
+                idr_remove(&allocated_ptys, index);
+                mutex_unlock(&allocated_ptys_lock);
+                return -EIO;
+        }
+        mutex_unlock(&allocated_ptys_lock);
+        return index;
+}
+void devpts_kill_index(int idx)
+{
+        mutex_lock(&allocated_ptys_lock);
+        idr_remove(&allocated_ptys, idx);
+        mutex_unlock(&allocated_ptys_lock);
+}
 int devpts_pty_new(struct tty_struct *tty)
 {
-        int number = tty->index;
+        int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
        struct tty_driver *driver = tty->driver;
        dev_t device = MKDEV(driver->major, driver->minor_start+number);
        struct dentry *dentry;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b64e55e0515d..499e16759e96 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void)
        dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
        if (!dlm_kset) {
-                printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__);
+                printk(KERN_WARNING "%s: can not create kset\n", __func__);
                return -ENOMEM;
        }
        return 0;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 28d01ed66de0..676073b8dda5 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/fdtable.h>
 int dir_notify_enable __read_mostly = 1;
@@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
        struct dnotify_struct **prev;
        struct inode *inode;
        fl_owner_t id = current->files;
+        struct file *f;
        int error = 0;
        if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
                prev = &odn->dn_next;
        }
+        rcu_read_lock();
+        f = fcheck(fd);
+        rcu_read_unlock();
+        /* we'd lost the race with close(), sod off silently */
+        /* note that inode->i_lock prevents reordering problems
+         * between accesses to descriptor table and ->i_dnotify */
+        if (f != filp)
+                goto out_free;
        error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
        if (error)
                goto out_free;
diff --git a/fs/dquot.c b/fs/dquot.c
index dfba1623cccb..5ac77da19959 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1491,6 +1491,16 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
        /* We need to serialize quota_off() for device */
        mutex_lock(&dqopt->dqonoff_mutex);
+        /*
+         * Skip everything if there's nothing to do. We have to do this because
+         * sometimes we are called when fill_super() failed and calling
+         * sync_fs() in such cases does no good.
+         */
+        if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
+                mutex_unlock(&dqopt->dqonoff_mutex);
+                return 0;
+        }
        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                toputinode[cnt] = NULL;
                if (type != -1 && cnt != type)
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 59375efcf39d..3e5637fc3779 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,18 +14,26 @@ int sysctl_drop_caches;
 static void drop_pagecache_sb(struct super_block *sb)
 {
-        struct inode *inode;
+        struct inode *inode, *toput_inode = NULL;
        spin_lock(&inode_lock);
        list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
                if (inode->i_state & (I_FREEING|I_WILL_FREE))
                        continue;
+                if (inode->i_mapping->nrpages == 0)
+                        continue;
+                __iget(inode);
+                spin_unlock(&inode_lock);
                __invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
+                iput(toput_inode);
+                toput_inode = inode;
+                spin_lock(&inode_lock);
        }
        spin_unlock(&inode_lock);
+        iput(toput_inode);
 }
-void drop_pagecache(void)
+static void drop_pagecache(void)
 {
        struct super_block *sb;
@@ -45,7 +53,7 @@ restart:
        spin_unlock(&sb_lock);
 }
-void drop_slab(void)
+static void drop_slab(void)
 {
        int nr_objects;
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 768857015516..1e34a7fd4884 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
 obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a066e109ad9c..cd62d75b2cc0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst,
        if (rc) {
                printk(KERN_ERR
                       "%s: Error initializing crypto hash; rc = [%d]\n",
-                       __FUNCTION__, rc);
+                       __func__, rc);
                goto out;
        }
        rc = crypto_hash_update(&desc, &sg, len);
        if (rc) {
                printk(KERN_ERR
                       "%s: Error updating crypto hash; rc = [%d]\n",
-                       __FUNCTION__, rc);
+                       __func__, rc);
                goto out;
        }
        rc = crypto_hash_final(&desc, dst);
        if (rc) {
                printk(KERN_ERR
                       "%s: Error finalizing crypto hash; rc = [%d]\n",
-                       __FUNCTION__, rc);
+                       __func__, rc);
                goto out;
        }
 out:
@@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
        if (rc < 0) {
                printk(KERN_ERR "%s: Error attempting to encrypt page with "
                       "page->index = [%ld], extent_offset = [%ld]; "
-                       "rc = [%d]\n", __FUNCTION__, page->index, extent_offset,
+                       "rc = [%d]\n", __func__, page->index, extent_offset,
                       rc);
                goto out;
        }
@@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page)
                                                       0, PAGE_CACHE_SIZE);
                if (rc)
                        printk(KERN_ERR "%s: Error attempting to copy "
-                               "page at index [%ld]\n", __FUNCTION__,
+                               "page at index [%ld]\n", __func__,
                               page->index);
                goto out;
        }
@@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page)
                                             extent_offset);
                if (rc) {
                        printk(KERN_ERR "%s: Error encrypting extent; "
-                               "rc = [%d]\n", __FUNCTION__, rc);
+                               "rc = [%d]\n", __func__, rc);
                        goto out;
                }
                ecryptfs_lower_offset_for_extent(
@@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page,
        if (rc < 0) {
                printk(KERN_ERR "%s: Error attempting to decrypt to page with "
                       "page->index = [%ld], extent_offset = [%ld]; "
-                       "rc = [%d]\n", __FUNCTION__, page->index, extent_offset,
+                       "rc = [%d]\n", __func__, page->index, extent_offset,
                       rc);
                goto out;
        }
@@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page)
                                                      ecryptfs_inode);
                if (rc)
                        printk(KERN_ERR "%s: Error attempting to copy "
-                               "page at index [%ld]\n", __FUNCTION__,
+                               "page at index [%ld]\n", __func__,
                               page->index);
                goto out;
        }
@@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page)
                                             extent_offset);
                if (rc) {
                        printk(KERN_ERR "%s: Error encrypting extent; "
-                               "rc = [%d]\n", __FUNCTION__, rc);
+                               "rc = [%d]\n", __func__, rc);
                        goto out;
                }
        }
@@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data,
                                 ecryptfs_inode);
        if (rc) {
                printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
-                       __FUNCTION__, rc);
+                       __func__, rc);
                goto out;
        }
        if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt,
        (*written) = 6;
 }
-struct kmem_cache *ecryptfs_header_cache_0;
 struct kmem_cache *ecryptfs_header_cache_1;
 struct kmem_cache *ecryptfs_header_cache_2;
@@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
                                  0, crypt_stat->num_header_bytes_at_front);
        if (rc)
                printk(KERN_ERR "%s: Error attempting to write header "
-                       "information to lower file; rc = [%d]\n", __FUNCTION__,
+                       "information to lower file; rc = [%d]\n", __func__,
                       rc);
        return rc;
 }
@@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
                }
        } else {
                printk(KERN_WARNING "%s: Encrypted flag not set\n",
-                       __FUNCTION__);
+                       __func__);
                rc = -EINVAL;
                goto out;
        }
        /* Released in this function */
        virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL);
        if (!virt) {
-                printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__);
+                printk(KERN_ERR "%s: Out of memory\n", __func__);
                rc = -ENOMEM;
                goto out;
        }
@@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
                                         ecryptfs_dentry);
        if (unlikely(rc)) {
                printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
-                       __FUNCTION__, rc);
+                       __func__, rc);
                goto out_free;
        }
        if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
@@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
                                                         ecryptfs_dentry, virt);
        if (rc) {
                printk(KERN_ERR "%s: Error writing metadata out to lower file; "
-                       "rc = [%d]\n", __FUNCTION__, rc);
+                       "rc = [%d]\n", __func__, rc);
                goto out_free;
        }
 out_free:
@@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
        if (!page_virt) {
                rc = -ENOMEM;
                printk(KERN_ERR "%s: Unable to allocate page_virt\n",
-                       __FUNCTION__);
+                       __func__);
                goto out;
        }
        rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5007f788da01..951ee33a022d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,7 +4,7 @@
 *
 * Copyright (C) 1997-2003 Erez Zadok
 * Copyright (C) 2001-2003 Stony Brook University
- * Copyright (C) 2004-2007 International Business Machines Corp.
+ * Copyright (C) 2004-2008 International Business Machines Corp.
 *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
 *              Trevor S. Highland <trevor.highland@gmail.com>
 *              Tyler Hicks <tyhicks@ou.edu>
@@ -34,6 +34,7 @@
 #include <linux/namei.h>
 #include <linux/scatterlist.h>
 #include <linux/hash.h>
+#include <linux/nsproxy.h>
 /* Version verification for shared data structures w/ userspace */
 #define ECRYPTFS_VERSION_MAJOR 0x00
@@ -49,11 +50,13 @@
 #define ECRYPTFS_VERSIONING_POLICY                0x00000008
 #define ECRYPTFS_VERSIONING_XATTR                 0x00000010
 #define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
+#define ECRYPTFS_VERSIONING_DEVMISC               0x00000040
 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
                                  | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
                                  | ECRYPTFS_VERSIONING_PUBKEY \
                                  | ECRYPTFS_VERSIONING_XATTR \
-                                  | ECRYPTFS_VERSIONING_MULTKEY)
+                                  | ECRYPTFS_VERSIONING_MULTKEY \
+                                  | ECRYPTFS_VERSIONING_DEVMISC)
 #define ECRYPTFS_MAX_PASSWORD_LENGTH 64
 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
 #define ECRYPTFS_SALT_SIZE 8
@@ -73,17 +76,14 @@
 #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
 #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
-#define ECRYPTFS_NLMSG_HELO 100
-#define ECRYPTFS_NLMSG_QUIT 101
-#define ECRYPTFS_NLMSG_REQUEST 102
-#define ECRYPTFS_NLMSG_RESPONSE 103
 #define ECRYPTFS_MAX_PKI_NAME_BYTES 16
 #define ECRYPTFS_DEFAULT_NUM_USERS 4
 #define ECRYPTFS_MAX_NUM_USERS 32768
 #define ECRYPTFS_TRANSPORT_NETLINK 0
 #define ECRYPTFS_TRANSPORT_CONNECTOR 1
 #define ECRYPTFS_TRANSPORT_RELAYFS 2
-#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK
+#define ECRYPTFS_TRANSPORT_MISCDEV 3
+#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV
 #define ECRYPTFS_XATTR_NAME "user.ecryptfs"
 #define RFC2440_CIPHER_DES3_EDE 0x02
@@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item {
 };
 struct ecryptfs_message {
+        /* Can never be greater than ecryptfs_message_buf_len */
+        /* Used to find the parent msg_ctx */
+        /* Inherits from msg_ctx->index */
        u32 index;
        u32 data_len;
        u8 data[];
 };
 struct ecryptfs_msg_ctx {
-#define ECRYPTFS_MSG_CTX_STATE_FREE      0x0001
+#define ECRYPTFS_MSG_CTX_STATE_FREE     0x01
-#define ECRYPTFS_MSG_CTX_STATE_PENDING   0x0002
+#define ECRYPTFS_MSG_CTX_STATE_PENDING  0x02
-#define ECRYPTFS_MSG_CTX_STATE_DONE      0x0003
+#define ECRYPTFS_MSG_CTX_STATE_DONE     0x03
-        u32 state;
+#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04
-        unsigned int index;
+        u8 state;
-        unsigned int counter;
+#define ECRYPTFS_MSG_HELO 100
+#define ECRYPTFS_MSG_QUIT 101
+#define ECRYPTFS_MSG_REQUEST 102
+#define ECRYPTFS_MSG_RESPONSE 103
+        u8 type;
+        u32 index;
+        /* Counter converts to a sequence number. Each message sent
+         * out for which we expect a response has an associated
+         * sequence number. The response must have the same sequence
+         * number as the counter for the msg_stc for the message to be
+         * valid. */
+        u32 counter;
+        size_t msg_size;
        struct ecryptfs_message *msg;
        struct task_struct *task;
        struct list_head node;
+        struct list_head daemon_out_list;
        struct mutex mux;
 };
 extern unsigned int ecryptfs_transport;
-struct ecryptfs_daemon_id {
+struct ecryptfs_daemon;
-        pid_t pid;
-        uid_t uid;
+struct ecryptfs_daemon {
-        struct hlist_node id_chain;
+#define ECRYPTFS_DAEMON_IN_READ      0x00000001
+#define ECRYPTFS_DAEMON_IN_POLL      0x00000002
+#define ECRYPTFS_DAEMON_ZOMBIE       0x00000004
+#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008
+        u32 flags;
+        u32 num_queued_msg_ctx;
+        struct pid *pid;
+        uid_t euid;
+        struct user_namespace *user_ns;
+        struct task_struct *task;
+        struct mutex mux;
+        struct list_head msg_ctx_out_queue;
+        wait_queue_head_t wait;
+        struct hlist_node euid_chain;
 };
+extern struct mutex ecryptfs_daemon_hash_mux;
 static inline struct ecryptfs_file_info *
 ecryptfs_file_to_private(struct file *file)
 {
@@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
 }
 #define ecryptfs_printk(type, fmt, arg...) \
-        __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg);
+        __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
 void __ecryptfs_printk(const char *fmt, ...);
 extern const struct file_operations ecryptfs_main_fops;
@@ -581,10 +612,13 @@ int
 ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                  size_t size, int flags);
 int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
-int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid);
+int ecryptfs_process_helo(unsigned int transport, uid_t euid,
-int ecryptfs_process_quit(uid_t uid, pid_t pid);
+                          struct user_namespace *user_ns, struct pid *pid);
-int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
+int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
-                              pid_t pid, u32 seq);
+                          struct pid *pid);
+int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
+                              struct user_namespace *user_ns, struct pid *pid,
+                              u32 seq);
 int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
                          struct ecryptfs_msg_ctx **msg_ctx);
 int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
@@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport);
 void ecryptfs_release_messaging(unsigned int transport);
 int ecryptfs_send_netlink(char *data, int data_len,
-                          struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
+                          struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
-                          u16 msg_flags, pid_t daemon_pid);
+                          u16 msg_flags, struct pid *daemon_pid);
 int ecryptfs_init_netlink(void);
 void ecryptfs_release_netlink(void);
 int ecryptfs_send_connector(char *data, int data_len,
-                            struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
+                            struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
-                            u16 msg_flags, pid_t daemon_pid);
+                            u16 msg_flags, struct pid *daemon_pid);
 int ecryptfs_init_connector(void);
 void ecryptfs_release_connector(void);
 void
@@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
                                     size_t offset_in_page, size_t size,
                                     struct inode *ecryptfs_inode);
 struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
+int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
+int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
+                                 struct user_namespace *user_ns);
+int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
+                                 size_t *length_size);
+int ecryptfs_write_packet_length(char *dest, size_t size,
+                                 size_t *packet_size_length);
+int ecryptfs_init_ecryptfs_miscdev(void);
+void ecryptfs_destroy_ecryptfs_miscdev(void);
+int ecryptfs_send_miscdev(char *data, size_t data_size,
+                          struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
+                          u16 msg_flags, struct ecryptfs_daemon *daemon);
+void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
+int
+ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
+                      struct user_namespace *user_ns, struct pid *pid);
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b8f5ed4adea..2258b8f654a6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
                file, ecryptfs_inode_to_private(inode)->lower_file);
        if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
                ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
+                mutex_lock(&crypt_stat->cs_mutex);
                crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
+                mutex_unlock(&crypt_stat->cs_mutex);
                rc = 0;
                goto out;
        }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23861152101..c92cc1c00aae 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -37,17 +37,11 @@ static struct dentry *lock_parent(struct dentry *dentry)
 {
        struct dentry *dir;
-        dir = dget(dentry->d_parent);
+        dir = dget_parent(dentry);
        mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT);
        return dir;
 }
-static void unlock_parent(struct dentry *dentry)
-{
-        mutex_unlock(&(dentry->d_parent->d_inode->i_mutex));
-        dput(dentry->d_parent);
-}
 static void unlock_dir(struct dentry *dir)
 {
        mutex_unlock(&dir->d_inode->i_mutex);
@@ -111,7 +105,7 @@ ecryptfs_do_create(struct inode *directory_inode,
        lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-        if (unlikely(IS_ERR(lower_dir_dentry))) {
+        if (IS_ERR(lower_dir_dentry)) {
                ecryptfs_printk(KERN_ERR, "Error locking directory of "
                                "dentry\n");
                rc = PTR_ERR(lower_dir_dentry);
@@ -121,7 +115,7 @@ ecryptfs_do_create(struct inode *directory_inode,
                                             ecryptfs_dentry, mode, nd);
        if (rc) {
                printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
-                       "rc = [%d]\n", __FUNCTION__, rc);
+                       "rc = [%d]\n", __func__, rc);
                goto out_lock;
        }
        rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
@@ -426,8 +420,9 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
        int rc = 0;
        struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
        struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
+        struct dentry *lower_dir_dentry;
-        lock_parent(lower_dentry);
+        lower_dir_dentry = lock_parent(lower_dentry);
        rc = vfs_unlink(lower_dir_inode, lower_dentry);
        if (rc) {
                printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
@@ -439,7 +434,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
        dentry->d_inode->i_ctime = dir->i_ctime;
        d_drop(dentry);
 out_unlock:
-        unlock_parent(lower_dentry);
+        unlock_dir(lower_dir_dentry);
        return rc;
 }
@@ -908,7 +903,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
                ia->ia_valid &= ~ATTR_MODE;
+        mutex_lock(&lower_dentry->d_inode->i_mutex);
        rc = notify_change(lower_dentry, ia);
+        mutex_unlock(&lower_dentry->d_inode->i_mutex);
 out:
        fsstack_copy_attr_all(inode, lower_inode, NULL);
        return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 682b1b2482c2..e82b457180be 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,7 +65,7 @@ static int process_request_key_err(long err_code)
 }
 /**
- * parse_packet_length
+ * ecryptfs_parse_packet_length
 * @data: Pointer to memory containing length at offset
 * @size: This function writes the decoded size to this memory
 *        address; zero on error
@@ -73,8 +73,8 @@ static int process_request_key_err(long err_code)
 *
 * Returns zero on success; non-zero on error
 */
-static int parse_packet_length(unsigned char *data, size_t *size,
+int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
-                               size_t *length_size)
+                                 size_t *length_size)
 {
        int rc = 0;
@@ -105,7 +105,7 @@ out:
 }
 /**
- * write_packet_length
+ * ecryptfs_write_packet_length
 * @dest: The byte array target into which to write the length. Must
 *        have at least 5 bytes allocated.
 * @size: The length to write.
@@ -114,8 +114,8 @@ out:
 *
 * Returns zero on success; non-zero on error.
 */
-static int write_packet_length(char *dest, size_t size,
+int ecryptfs_write_packet_length(char *dest, size_t size,
-                               size_t *packet_size_length)
+                                 size_t *packet_size_length)
 {
        int rc = 0;
@@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
                goto out;
        }
        message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
-        rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
+        rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
-                                 &packet_size_len);
+                                          &packet_size_len);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
                                "header; cannot generate packet length\n");
@@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
        i += packet_size_len;
        memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
        i += ECRYPTFS_SIG_SIZE_HEX;
-        rc = write_packet_length(&message[i], session_key->encrypted_key_size,
+        rc = ecryptfs_write_packet_length(&message[i],
-                                 &packet_size_len);
+                                          session_key->encrypted_key_size,
+                                          &packet_size_len);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
                                "header; cannot generate packet length\n");
@@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code,
                rc = -EIO;
                goto out;
        }
-        rc = parse_packet_length(&data[i], &m_size, &data_len);
+        rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len);
        if (rc) {
                ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
                                "rc = [%d]\n", rc);
@@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
                goto out;
        }
        message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
-        rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
+        rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
-                                 &packet_size_len);
+                                          &packet_size_len);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
                                "header; cannot generate packet length\n");
@@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
        memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
        i += ECRYPTFS_SIG_SIZE_HEX;
        /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
-        rc = write_packet_length(&message[i], crypt_stat->key_size + 3,
+        rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
-                                 &packet_size_len);
+                                          &packet_size_len);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
                                "header; cannot generate packet length\n");
@@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
        /* verify that everything through the encrypted FEK size is present */
        if (message_len < 4) {
                rc = -EIO;
+                printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable "
+                       "message length is [%d]\n", __func__, message_len, 4);
                goto out;
        }
        if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
-                ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
                rc = -EIO;
+                printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n",
+                       __func__);
                goto out;
        }
        if (data[i++]) {
-                ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
-                                " [%d]\n", data[i-1]);
                rc = -EIO;
+                printk(KERN_ERR "%s: Status indicator has non zero "
+                       "value [%d]\n", __func__, data[i-1]);
                goto out;
        }
-        rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len);
+        rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size,
+                                          &data_len);
        if (rc) {
                ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
                                "rc = [%d]\n", rc);
@@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
        }
        i += data_len;
        if (message_len < (i + key_rec->enc_key_size)) {
-                ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
-                                message_len, (i + key_rec->enc_key_size));
                rc = -EIO;
+                printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n",
+                       __func__, message_len, (i + key_rec->enc_key_size));
                goto out;
        }
        if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
-                ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
-                                "the maximum key size [%d]\n",
-                                key_rec->enc_key_size,
-                                ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
                rc = -EIO;
+                printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than "
+                       "the maximum key size [%d]\n", __func__,
+                       key_rec->enc_key_size,
+                       ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
                goto out;
        }
        memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
@@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
        rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
                                 &netlink_message, &netlink_message_length);
        if (rc) {
-                ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet");
+                ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n");
                goto out;
        }
        rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
@@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
                goto out;
        }
        (*new_auth_tok) = &auth_tok_list_item->auth_tok;
-        rc = parse_packet_length(&data[(*packet_size)], &body_size,
+        rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
-                                 &length_size);
+                                          &length_size);
        if (rc) {
                printk(KERN_WARNING "Error parsing packet length; "
                       "rc = [%d]\n", rc);
@@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
                goto out;
        }
        (*new_auth_tok) = &auth_tok_list_item->auth_tok;
-        rc = parse_packet_length(&data[(*packet_size)], &body_size,
+        rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
-                                 &length_size);
+                                          &length_size);
        if (rc) {
                printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
                       rc);
@@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
                rc = -EINVAL;
                goto out;
        }
-        rc = parse_packet_length(&data[(*packet_size)], &body_size,
+        rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
-                                 &length_size);
+                                          &length_size);
        if (rc) {
                printk(KERN_WARNING "Invalid tag 11 packet format\n");
                goto out;
@@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
                        auth_tok->token.private_key.key_size;
        rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
        if (rc) {
-                ecryptfs_printk(KERN_ERR, "Failed to encrypt session key "
+                printk(KERN_ERR "Failed to encrypt session key via a key "
-                                "via a pki");
+                       "module; rc = [%d]\n", rc);
                goto out;
        }
        if (ecryptfs_verbosity > 0) {
@@ -1430,8 +1436,9 @@ encrypted_session_key_set:
                goto out;
        }
        dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
-        rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4),
+        rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
-                                 &packet_size_length);
+                                          (max_packet_size - 4),
+                                          &packet_size_length);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
                                "header; cannot generate packet length\n");
@@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
                goto out;
        }
        dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
-        rc = write_packet_length(&dest[(*packet_length)],
+        rc = ecryptfs_write_packet_length(&dest[(*packet_length)],
-                                 (max_packet_size - 4), &packet_size_length);
+                                          (max_packet_size - 4),
+                                          &packet_size_length);
        if (rc) {
                printk(KERN_ERR "Error generating tag 11 packet header; cannot "
                       "generate packet length. rc = [%d]\n", rc);
@@ -1682,8 +1690,9 @@ encrypted_session_key_set:
        dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
        /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
         * to get the number of octets in the actual Tag 3 packet */
-        rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4),
+        rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
-                                 &packet_size_length);
+                                          (max_packet_size - 4),
+                                          &packet_size_length);
        if (rc) {
                printk(KERN_ERR "Error generating tag 3 packet header; cannot "
                       "generate packet length. rc = [%d]\n", rc);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d25ac9500a92..d603631601eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
        if (rc) {
                printk(KERN_ERR "%s: Error attempting to initialize the "
                       "persistent file for the dentry with name [%s]; "
-                       "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc);
+                       "rc = [%d]\n", __func__, dentry->d_name.name, rc);
                goto out;
        }
 out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 9cc2aec27b0d..1b5c20058acb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -1,7 +1,7 @@
 /**
 * eCryptfs: Linux filesystem encryption layer
 *
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2008 International Business Machines Corp.
 *   Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
 *              Tyler Hicks <tyhicks@ou.edu>
 *
@@ -20,19 +20,21 @@
 * 02111-1307, USA.
 */
 #include <linux/sched.h>
+#include <linux/user_namespace.h>
+#include <linux/nsproxy.h>
 #include "ecryptfs_kernel.h"
 static LIST_HEAD(ecryptfs_msg_ctx_free_list);
 static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
 static struct mutex ecryptfs_msg_ctx_lists_mux;
-static struct hlist_head *ecryptfs_daemon_id_hash;
+static struct hlist_head *ecryptfs_daemon_hash;
-static struct mutex ecryptfs_daemon_id_hash_mux;
+struct mutex ecryptfs_daemon_hash_mux;
 static int ecryptfs_hash_buckets;
 #define ecryptfs_uid_hash(uid) \
        hash_long((unsigned long)uid, ecryptfs_hash_buckets)
-static unsigned int ecryptfs_msg_counter;
+static u32 ecryptfs_msg_counter;
 static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
 /**
@@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
 * @msg_ctx: The context that was acquired from the free list
 *
 * Acquires a context element from the free list and locks the mutex
- * on the context.  Returns zero on success; non-zero on error or upon
+ * on the context.  Sets the msg_ctx task to current.  Returns zero on
- * failure to acquire a free context element.  Be sure to lock the
+ * success; non-zero on error or upon failure to acquire a free
- * list mutex before calling.
+ * context element.  Must be called with ecryptfs_msg_ctx_lists_mux
+ * held.
 */
 static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
 {
@@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
        int rc;
        if (list_empty(&ecryptfs_msg_ctx_free_list)) {
-                ecryptfs_printk(KERN_WARNING, "The eCryptfs free "
+                printk(KERN_WARNING "%s: The eCryptfs free "
-                                "context list is empty.  It may be helpful to "
+                       "context list is empty.  It may be helpful to "
-                                "specify the ecryptfs_message_buf_len "
+                       "specify the ecryptfs_message_buf_len "
-                                "parameter to be greater than the current "
+                       "parameter to be greater than the current "
-                                "value of [%d]\n", ecryptfs_message_buf_len);
+                       "value of [%d]\n", __func__, ecryptfs_message_buf_len);
                rc = -ENOMEM;
                goto out;
        }
@@ -75,8 +78,7 @@ out:
 * ecryptfs_msg_ctx_free_to_alloc
 * @msg_ctx: The context to move from the free list to the alloc list
 *
- * Be sure to lock the list mutex and the context mutex before
+ * Must be called with ecryptfs_msg_ctx_lists_mux held.
- * calling.
 */
 static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
 {
@@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
 * ecryptfs_msg_ctx_alloc_to_free
 * @msg_ctx: The context to move from the alloc list to the free list
 *
- * Be sure to lock the list mutex and the context mutex before
+ * Must be called with ecryptfs_msg_ctx_lists_mux held.
- * calling.
 */
-static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
+void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
 {
        list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
        if (msg_ctx->msg)
                kfree(msg_ctx->msg);
+        msg_ctx->msg = NULL;
        msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
 }
 /**
- * ecryptfs_find_daemon_id
+ * ecryptfs_find_daemon_by_euid
- * @uid: The user id which maps to the desired daemon id
+ * @euid: The effective user id which maps to the desired daemon id
- * @id: If return value is zero, points to the desired daemon id
+ * @user_ns: The namespace in which @euid applies
- *      pointer
+ * @daemon: If return value is zero, points to the desired daemon pointer
 *
- * Search the hash list for the given user id.  Returns zero if the
+ * Must be called with ecryptfs_daemon_hash_mux held.
- * user id exists in the list; non-zero otherwise.  The daemon id hash
+ *
- * mutex should be held before calling this function.
+ * Search the hash list for the given user id.
+ *
+ * Returns zero if the user id exists in the list; non-zero otherwise.
 */
-static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id)
+int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
+                                 struct user_namespace *user_ns)
 {
        struct hlist_node *elem;
        int rc;
-        hlist_for_each_entry(*id, elem,
+        hlist_for_each_entry(*daemon, elem,
-                             &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)],
+                             &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)],
-                             id_chain) {
+                             euid_chain) {
-                if ((*id)->uid == uid) {
+                if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) {
                        rc = 0;
                        goto out;
                }
@@ -128,181 +133,325 @@ out:
        return rc;
 }
-static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type,
+static int
-                                     pid_t pid)
+ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
+                             u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx);
+/**
+ * ecryptfs_send_raw_message
+ * @transport: Transport type
+ * @msg_type: Message type
+ * @daemon: Daemon struct for recipient of message
+ *
+ * A raw message is one that does not include an ecryptfs_message
+ * struct. It simply has a type.
+ *
+ * Must be called with ecryptfs_daemon_hash_mux held.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type,
+                                     struct ecryptfs_daemon *daemon)
 {
+        struct ecryptfs_msg_ctx *msg_ctx;
        int rc;
        switch(transport) {
        case ECRYPTFS_TRANSPORT_NETLINK:
-                rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid);
+                rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0,
+                                           daemon->pid);
+                break;
+        case ECRYPTFS_TRANSPORT_MISCDEV:
+                rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type,
+                                                  &msg_ctx);
+                if (rc) {
+                        printk(KERN_ERR "%s: Error whilst attempting to send "
+                               "message via procfs; rc = [%d]\n", __func__, rc);
+                        goto out;
+                }
+                /* Raw messages are logically context-free (e.g., no
+                 * reply is expected), so we set the state of the
+                 * ecryptfs_msg_ctx object to indicate that it should
+                 * be freed as soon as the transport sends out the message. */
+                mutex_lock(&msg_ctx->mux);
+                msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
+                mutex_unlock(&msg_ctx->mux);
                break;
        case ECRYPTFS_TRANSPORT_CONNECTOR:
        case ECRYPTFS_TRANSPORT_RELAYFS:
        default:
                rc = -ENOSYS;
        }
+out:
+        return rc;
+}
+/**
+ * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
+ * @daemon: Pointer to set to newly allocated daemon struct
+ * @euid: Effective user id for the daemon
+ * @user_ns: The namespace in which @euid applies
+ * @pid: Process id for the daemon
+ *
+ * Must be called ceremoniously while in possession of
+ * ecryptfs_sacred_daemon_hash_mux
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int
+ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
+                      struct user_namespace *user_ns, struct pid *pid)
+{
+        int rc = 0;
+        (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
+        if (!(*daemon)) {
+                rc = -ENOMEM;
+                printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
+                       "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
+                goto out;
+        }
+        (*daemon)->euid = euid;
+        (*daemon)->user_ns = get_user_ns(user_ns);
+        (*daemon)->pid = get_pid(pid);
+        (*daemon)->task = current;
+        mutex_init(&(*daemon)->mux);
+        INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue);
+        init_waitqueue_head(&(*daemon)->wait);
+        (*daemon)->num_queued_msg_ctx = 0;
+        hlist_add_head(&(*daemon)->euid_chain,
+                       &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]);
+out:
        return rc;
 }
 /**
 * ecryptfs_process_helo
 * @transport: The underlying transport (netlink, etc.)
- * @uid: The user ID owner of the message
+ * @euid: The user ID owner of the message
+ * @user_ns: The namespace in which @euid applies
 * @pid: The process ID for the userspace program that sent the
 *       message
 *
- * Adds the uid and pid values to the daemon id hash.  If a uid
+ * Adds the euid and pid values to the daemon euid hash.  If an euid
 * already has a daemon pid registered, the daemon will be
- * unregistered before the new daemon id is put into the hash list.
+ * unregistered before the new daemon is put into the hash list.
- * Returns zero after adding a new daemon id to the hash list;
+ * Returns zero after adding a new daemon to the hash list;
 * non-zero otherwise.
 */
-int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid)
+int ecryptfs_process_helo(unsigned int transport, uid_t euid,
+                          struct user_namespace *user_ns, struct pid *pid)
 {
-        struct ecryptfs_daemon_id *new_id;
+        struct ecryptfs_daemon *new_daemon;
-        struct ecryptfs_daemon_id *old_id;
+        struct ecryptfs_daemon *old_daemon;
        int rc;
-        mutex_lock(&ecryptfs_daemon_id_hash_mux);
+        mutex_lock(&ecryptfs_daemon_hash_mux);
-        new_id = kmalloc(sizeof(*new_id), GFP_KERNEL);
+        rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
-        if (!new_id) {
+        if (rc != 0) {
-                rc = -ENOMEM;
-                ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
-                                "to register daemon [%d] for user [%d]\n",
-                                pid, uid);
-                goto unlock;
-        }
-        if (!ecryptfs_find_daemon_id(uid, &old_id)) {
                printk(KERN_WARNING "Received request from user [%d] "
-                       "to register daemon [%d]; unregistering daemon "
+                       "to register daemon [0x%p]; unregistering daemon "
-                       "[%d]\n", uid, pid, old_id->pid);
+                       "[0x%p]\n", euid, pid, old_daemon->pid);
-                hlist_del(&old_id->id_chain);
+                rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT,
-                rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT,
+                                               old_daemon);
-                                               old_id->pid);
                if (rc)
                        printk(KERN_WARNING "Failed to send QUIT "
-                               "message to daemon [%d]; rc = [%d]\n",
+                               "message to daemon [0x%p]; rc = [%d]\n",
-                               old_id->pid, rc);
+                               old_daemon->pid, rc);
-                kfree(old_id);
+                hlist_del(&old_daemon->euid_chain);
+                kfree(old_daemon);
        }
-        new_id->uid = uid;
+        rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
-        new_id->pid = pid;
+        if (rc)
-        hlist_add_head(&new_id->id_chain,
+                printk(KERN_ERR "%s: The gods are displeased with this attempt "
-                       &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]);
+                       "to create a new daemon object for euid [%d]; pid "
-        rc = 0;
+                       "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
-unlock:
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
-        mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+        return rc;
+}
+/**
+ * ecryptfs_exorcise_daemon - Destroy the daemon struct
+ *
+ * Must be called ceremoniously while in possession of
+ * ecryptfs_daemon_hash_mux and the daemon's own mux.
+ */
+int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
+{
+        struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp;
+        int rc = 0;
+        mutex_lock(&daemon->mux);
+        if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ)
+            || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) {
+                rc = -EBUSY;
+                printk(KERN_WARNING "%s: Attempt to destroy daemon with pid "
+                       "[0x%p], but it is in the midst of a read or a poll\n",
+                       __func__, daemon->pid);
+                mutex_unlock(&daemon->mux);
+                goto out;
+        }
+        list_for_each_entry_safe(msg_ctx, msg_ctx_tmp,
+                                 &daemon->msg_ctx_out_queue, daemon_out_list) {
+                list_del(&msg_ctx->daemon_out_list);
+                daemon->num_queued_msg_ctx--;
+                printk(KERN_WARNING "%s: Warning: dropping message that is in "
+                       "the out queue of a dying daemon\n", __func__);
+                ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
+        }
+        hlist_del(&daemon->euid_chain);
+        if (daemon->task)
+                wake_up_process(daemon->task);
+        if (daemon->pid)
+                put_pid(daemon->pid);
+        if (daemon->user_ns)
+                put_user_ns(daemon->user_ns);
+        mutex_unlock(&daemon->mux);
+        memset(daemon, 0, sizeof(*daemon));
+        kfree(daemon);
+out:
        return rc;
 }
 /**
 * ecryptfs_process_quit
- * @uid: The user ID owner of the message
+ * @euid: The user ID owner of the message
+ * @user_ns: The namespace in which @euid applies
 * @pid: The process ID for the userspace program that sent the
 *       message
 *
- * Deletes the corresponding daemon id for the given uid and pid, if
+ * Deletes the corresponding daemon for the given euid and pid, if
 * it is the registered that is requesting the deletion. Returns zero
- * after deleting the desired daemon id; non-zero otherwise.
+ * after deleting the desired daemon; non-zero otherwise.
 */
-int ecryptfs_process_quit(uid_t uid, pid_t pid)
+int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
+                          struct pid *pid)
 {
-        struct ecryptfs_daemon_id *id;
+        struct ecryptfs_daemon *daemon;
        int rc;
-        mutex_lock(&ecryptfs_daemon_id_hash_mux);
+        mutex_lock(&ecryptfs_daemon_hash_mux);
-        if (ecryptfs_find_daemon_id(uid, &id)) {
+        rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns);
+        if (rc || !daemon) {
                rc = -EINVAL;
-                ecryptfs_printk(KERN_ERR, "Received request from user [%d] to "
+                printk(KERN_ERR "Received request from user [%d] to "
-                                "unregister unrecognized daemon [%d]\n", uid,
+                       "unregister unrecognized daemon [0x%p]\n", euid, pid);
-                                pid);
+                goto out_unlock;
-                goto unlock;
        }
-        if (id->pid != pid) {
+        rc = ecryptfs_exorcise_daemon(daemon);
-                rc = -EINVAL;
+out_unlock:
-                ecryptfs_printk(KERN_WARNING, "Received request from user [%d] "
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
-                                "with pid [%d] to unregister daemon [%d]\n",
-                                uid, pid, id->pid);
-                goto unlock;
-        }
-        hlist_del(&id->id_chain);
-        kfree(id);
-        rc = 0;
-unlock:
-        mutex_unlock(&ecryptfs_daemon_id_hash_mux);
        return rc;
 }
 /**
 * ecryptfs_process_reponse
 * @msg: The ecryptfs message received; the caller should sanity check
- *       msg->data_len
+ *       msg->data_len and free the memory
 * @pid: The process ID of the userspace application that sent the
 *       message
- * @seq: The sequence number of the message
+ * @seq: The sequence number of the message; must match the sequence
+ *       number for the existing message context waiting for this
+ *       response
+ *
+ * Processes a response message after sending an operation request to
+ * userspace. Some other process is awaiting this response. Before
+ * sending out its first communications, the other process allocated a
+ * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The
+ * response message contains this index so that we can copy over the
+ * response message into the msg_ctx that the process holds a
+ * reference to. The other process is going to wake up, check to see
+ * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then
+ * proceed to read off and process the response message. Returns zero
+ * upon delivery to desired context element; non-zero upon delivery
+ * failure or error.
 *
- * Processes a response message after sending a operation request to
+ * Returns zero on success; non-zero otherwise
- * userspace. Returns zero upon delivery to desired context element;
- * non-zero upon delivery failure or error.
 */
-int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
+int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
-                              pid_t pid, u32 seq)
+                              struct user_namespace *user_ns, struct pid *pid,
+                              u32 seq)
 {
-        struct ecryptfs_daemon_id *id;
+        struct ecryptfs_daemon *daemon;
        struct ecryptfs_msg_ctx *msg_ctx;
-        int msg_size;
+        size_t msg_size;
+        struct nsproxy *nsproxy;
+        struct user_namespace *current_user_ns;
        int rc;
        if (msg->index >= ecryptfs_message_buf_len) {
                rc = -EINVAL;
-                ecryptfs_printk(KERN_ERR, "Attempt to reference "
+                printk(KERN_ERR "%s: Attempt to reference "
-                                "context buffer at index [%d]; maximum "
+                       "context buffer at index [%d]; maximum "
-                                "allowable is [%d]\n", msg->index,
+                       "allowable is [%d]\n", __func__, msg->index,
-                                (ecryptfs_message_buf_len - 1));
+                       (ecryptfs_message_buf_len - 1));
                goto out;
        }
        msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
        mutex_lock(&msg_ctx->mux);
-        if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) {
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        rcu_read_lock();
+        nsproxy = task_nsproxy(msg_ctx->task);
+        if (nsproxy == NULL) {
                rc = -EBADMSG;
-                ecryptfs_printk(KERN_WARNING, "User [%d] received a "
+                printk(KERN_ERR "%s: Receiving process is a zombie. Dropping "
-                                "message response from process [%d] but does "
+                       "message.\n", __func__);
-                                "not have a registered daemon\n",
+                rcu_read_unlock();
-                                msg_ctx->task->euid, pid);
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
                goto wake_up;
        }
-        if (msg_ctx->task->euid != uid) {
+        current_user_ns = nsproxy->user_ns;
+        rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid,
+                                          current_user_ns);
+        rcu_read_unlock();
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+        if (rc) {
+                rc = -EBADMSG;
+                printk(KERN_WARNING "%s: User [%d] received a "
+                       "message response from process [0x%p] but does "
+                       "not have a registered daemon\n", __func__,
+                       msg_ctx->task->euid, pid);
+                goto wake_up;
+        }
+        if (msg_ctx->task->euid != euid) {
                rc = -EBADMSG;
-                ecryptfs_printk(KERN_WARNING, "Received message from user "
+                printk(KERN_WARNING "%s: Received message from user "
-                                "[%d]; expected message from user [%d]\n",
+                       "[%d]; expected message from user [%d]\n", __func__,
-                                uid, msg_ctx->task->euid);
+                       euid, msg_ctx->task->euid);
                goto unlock;
        }
-        if (id->pid != pid) {
+        if (current_user_ns != user_ns) {
                rc = -EBADMSG;
-                ecryptfs_printk(KERN_ERR, "User [%d] received a "
+                printk(KERN_WARNING "%s: Received message from user_ns "
-                                "message response from an unrecognized "
+                       "[0x%p]; expected message from user_ns [0x%p]\n",
-                                "process [%d]\n", msg_ctx->task->euid, pid);
+                       __func__, user_ns, nsproxy->user_ns);
+                goto unlock;
+        }
+        if (daemon->pid != pid) {
+                rc = -EBADMSG;
+                printk(KERN_ERR "%s: User [%d] sent a message response "
+                       "from an unrecognized process [0x%p]\n",
+                       __func__, msg_ctx->task->euid, pid);
                goto unlock;
        }
        if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
                rc = -EINVAL;
-                ecryptfs_printk(KERN_WARNING, "Desired context element is not "
+                printk(KERN_WARNING "%s: Desired context element is not "
-                                "pending a response\n");
+                       "pending a response\n", __func__);
                goto unlock;
        } else if (msg_ctx->counter != seq) {
                rc = -EINVAL;
-                ecryptfs_printk(KERN_WARNING, "Invalid message sequence; "
+                printk(KERN_WARNING "%s: Invalid message sequence; "
-                                "expected [%d]; received [%d]\n",
+                       "expected [%d]; received [%d]\n", __func__,
-                                msg_ctx->counter, seq);
+                       msg_ctx->counter, seq);
                goto unlock;
        }
-        msg_size = sizeof(*msg) + msg->data_len;
+        msg_size = (sizeof(*msg) + msg->data_len);
        msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
        if (!msg_ctx->msg) {
                rc = -ENOMEM;
-                ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
+                printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
+                       "GFP_KERNEL memory\n", __func__, msg_size);
                goto unlock;
        }
        memcpy(msg_ctx->msg, msg, msg_size);
@@ -317,34 +466,38 @@ out:
 }
 /**
- * ecryptfs_send_message
+ * ecryptfs_send_message_locked
 * @transport: The transport over which to send the message (i.e.,
 *             netlink)
 * @data: The data to send
 * @data_len: The length of data
 * @msg_ctx: The message context allocated for the send
+ *
+ * Must be called with ecryptfs_daemon_hash_mux held.
+ *
+ * Returns zero on success; non-zero otherwise
 */
-int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
+static int
-                          struct ecryptfs_msg_ctx **msg_ctx)
+ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
+                             u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx)
 {
-        struct ecryptfs_daemon_id *id;
+        struct ecryptfs_daemon *daemon;
        int rc;
-        mutex_lock(&ecryptfs_daemon_id_hash_mux);
+        rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
-        if (ecryptfs_find_daemon_id(current->euid, &id)) {
+                                          current->nsproxy->user_ns);
-                mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+        if (rc || !daemon) {
                rc = -ENOTCONN;
-                ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon "
+                printk(KERN_ERR "%s: User [%d] does not have a daemon "
-                                "registered\n", current->euid);
+                       "registered\n", __func__, current->euid);
                goto out;
        }
-        mutex_unlock(&ecryptfs_daemon_id_hash_mux);
        mutex_lock(&ecryptfs_msg_ctx_lists_mux);
        rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
        if (rc) {
                mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
-                ecryptfs_printk(KERN_WARNING, "Could not claim a free "
+                printk(KERN_WARNING "%s: Could not claim a free "
-                                "context element\n");
+                       "context element\n", __func__);
                goto out;
        }
        ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
@@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
        mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
        switch (transport) {
        case ECRYPTFS_TRANSPORT_NETLINK:
-                rc = ecryptfs_send_netlink(data, data_len, *msg_ctx,
+                rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type,
-                                           ECRYPTFS_NLMSG_REQUEST, 0, id->pid);
+                                           0, daemon->pid);
+                break;
+        case ECRYPTFS_TRANSPORT_MISCDEV:
+                rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type,
+                                           0, daemon);
                break;
        case ECRYPTFS_TRANSPORT_CONNECTOR:
        case ECRYPTFS_TRANSPORT_RELAYFS:
        default:
                rc = -ENOSYS;
        }
-        if (rc) {
+        if (rc)
-                printk(KERN_ERR "Error attempting to send message to userspace "
+                printk(KERN_ERR "%s: Error attempting to send message to "
-                       "daemon; rc = [%d]\n", rc);
+                       "userspace daemon; rc = [%d]\n", __func__, rc);
-        }
 out:
        return rc;
 }
 /**
+ * ecryptfs_send_message
+ * @transport: The transport over which to send the message (i.e.,
+ *             netlink)
+ * @data: The data to send
+ * @data_len: The length of data
+ * @msg_ctx: The message context allocated for the send
+ *
+ * Grabs ecryptfs_daemon_hash_mux.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
+                          struct ecryptfs_msg_ctx **msg_ctx)
+{
+        int rc;
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        rc = ecryptfs_send_message_locked(transport, data, data_len,
+                                          ECRYPTFS_MSG_REQUEST, msg_ctx);
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+        return rc;
+}
+/**
 * ecryptfs_wait_for_response
 * @msg_ctx: The context that was assigned when sending a message
 * @msg: The incoming message from userspace; not set if rc != 0
@@ -377,7 +557,7 @@ out:
 * of time exceeds ecryptfs_message_wait_timeout.  If zero is
 * returned, msg will point to a valid message from userspace; a
 * non-zero value is returned upon failure to receive a message or an
- * error occurs.
+ * error occurs. Callee must free @msg on success.
 */
 int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
                               struct ecryptfs_message **msg)
@@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport)
        if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
                ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
-                ecryptfs_printk(KERN_WARNING, "Specified number of users is "
+                printk(KERN_WARNING "%s: Specified number of users is "
-                                "too large, defaulting to [%d] users\n",
+                       "too large, defaulting to [%d] users\n", __func__,
-                                ecryptfs_number_of_users);
+                       ecryptfs_number_of_users);
        }
-        mutex_init(&ecryptfs_daemon_id_hash_mux);
+        mutex_init(&ecryptfs_daemon_hash_mux);
-        mutex_lock(&ecryptfs_daemon_id_hash_mux);
+        mutex_lock(&ecryptfs_daemon_hash_mux);
        ecryptfs_hash_buckets = 1;
        while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
                ecryptfs_hash_buckets++;
-        ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head)
+        ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
-                                          * ecryptfs_hash_buckets, GFP_KERNEL);
+                                        * ecryptfs_hash_buckets), GFP_KERNEL);
-        if (!ecryptfs_daemon_id_hash) {
+        if (!ecryptfs_daemon_hash) {
                rc = -ENOMEM;
-                ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
+                printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
-                mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
                goto out;
        }
        for (i = 0; i < ecryptfs_hash_buckets; i++)
-                INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]);
+                INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
-        mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
        ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
-                                      * ecryptfs_message_buf_len), GFP_KERNEL);
+                                        * ecryptfs_message_buf_len),
+                                       GFP_KERNEL);
        if (!ecryptfs_msg_ctx_arr) {
                rc = -ENOMEM;
-                ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
+                printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
                goto out;
        }
        mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport)
        ecryptfs_msg_counter = 0;
        for (i = 0; i < ecryptfs_message_buf_len; i++) {
                INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
+                INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list);
                mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
                mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
                ecryptfs_msg_ctx_arr[i].index = i;
@@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport)
                if (rc)
                        ecryptfs_release_messaging(transport);
                break;
+        case ECRYPTFS_TRANSPORT_MISCDEV:
+                rc = ecryptfs_init_ecryptfs_miscdev();
+                if (rc)
+                        ecryptfs_release_messaging(transport);
+                break;
        case ECRYPTFS_TRANSPORT_CONNECTOR:
        case ECRYPTFS_TRANSPORT_RELAYFS:
        default:
@@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport)
                kfree(ecryptfs_msg_ctx_arr);
                mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
        }
-        if (ecryptfs_daemon_id_hash) {
+        if (ecryptfs_daemon_hash) {
                struct hlist_node *elem;
-                struct ecryptfs_daemon_id *id;
+                struct ecryptfs_daemon *daemon;
                int i;
-                mutex_lock(&ecryptfs_daemon_id_hash_mux);
+                mutex_lock(&ecryptfs_daemon_hash_mux);
                for (i = 0; i < ecryptfs_hash_buckets; i++) {
-                        hlist_for_each_entry(id, elem,
+                        int rc;
-                                             &ecryptfs_daemon_id_hash[i],
-                                             id_chain) {
+                        hlist_for_each_entry(daemon, elem,
-                                hlist_del(elem);
+                                             &ecryptfs_daemon_hash[i],
-                                kfree(id);
+                                             euid_chain) {
+                                rc = ecryptfs_exorcise_daemon(daemon);
+                                if (rc)
+                                        printk(KERN_ERR "%s: Error whilst "
+                                               "attempting to destroy daemon; "
+                                               "rc = [%d]. Dazed and confused, "
+                                               "but trying to continue.\n",
+                                               __func__, rc);
                        }
                }
-                kfree(ecryptfs_daemon_id_hash);
+                kfree(ecryptfs_daemon_hash);
-                mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
        }
        switch(transport) {
        case ECRYPTFS_TRANSPORT_NETLINK:
                ecryptfs_release_netlink();
                break;
+        case ECRYPTFS_TRANSPORT_MISCDEV:
+                ecryptfs_destroy_ecryptfs_miscdev();
+                break;
        case ECRYPTFS_TRANSPORT_CONNECTOR:
        case ECRYPTFS_TRANSPORT_RELAYFS:
        default:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
new file mode 100644
index 000000000000..6560da1a58ce
--- /dev/null
+++ b/fs/ecryptfs/miscdev.c
@@ -0,0 +1,600 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2008 International Business Machines Corp.
+ *   Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/random.h>
+#include <linux/miscdevice.h>
+#include <linux/poll.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include "ecryptfs_kernel.h"
+static atomic_t ecryptfs_num_miscdev_opens;
+/**
+ * ecryptfs_miscdev_poll
+ * @file: dev file (ignored)
+ * @pt: dev poll table (ignored)
+ *
+ * Returns the poll mask
+ */
+static unsigned int
+ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
+{
+        struct ecryptfs_daemon *daemon;
+        unsigned int mask = 0;
+        int rc;
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        /* TODO: Just use file->private_data? */
+        rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
+                                          current->nsproxy->user_ns);
+        BUG_ON(rc || !daemon);
+        mutex_lock(&daemon->mux);
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+        if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+                printk(KERN_WARNING "%s: Attempt to poll on zombified "
+                       "daemon\n", __func__);
+                goto out_unlock_daemon;
+        }
+        if (daemon->flags & ECRYPTFS_DAEMON_IN_READ)
+                goto out_unlock_daemon;
+        if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)
+                goto out_unlock_daemon;
+        daemon->flags |= ECRYPTFS_DAEMON_IN_POLL;
+        mutex_unlock(&daemon->mux);
+        poll_wait(file, &daemon->wait, pt);
+        mutex_lock(&daemon->mux);
+        if (!list_empty(&daemon->msg_ctx_out_queue))
+                mask |= POLLIN | POLLRDNORM;
+out_unlock_daemon:
+        daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL;
+        mutex_unlock(&daemon->mux);
+        return mask;
+}
+/**
+ * ecryptfs_miscdev_open
+ * @inode: inode of miscdev handle (ignored)
+ * @file: file for miscdev handle (ignored)
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int
+ecryptfs_miscdev_open(struct inode *inode, struct file *file)
+{
+        struct ecryptfs_daemon *daemon = NULL;
+        int rc;
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        rc = try_module_get(THIS_MODULE);
+        if (rc == 0) {
+                rc = -EIO;
+                printk(KERN_ERR "%s: Error attempting to increment module use "
+                       "count; rc = [%d]\n", __func__, rc);
+                goto out_unlock_daemon_list;
+        }
+        rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
+                                          current->nsproxy->user_ns);
+        if (rc || !daemon) {
+                rc = ecryptfs_spawn_daemon(&daemon, current->euid,
+                                           current->nsproxy->user_ns,
+                                           task_pid(current));
+                if (rc) {
+                        printk(KERN_ERR "%s: Error attempting to spawn daemon; "
+                               "rc = [%d]\n", __func__, rc);
+                        goto out_module_put_unlock_daemon_list;
+                }
+        }
+        mutex_lock(&daemon->mux);
+        if (daemon->pid != task_pid(current)) {
+                rc = -EINVAL;
+                printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], "
+                       "but pid [0x%p] has attempted to open the handle "
+                       "instead\n", __func__, daemon->pid, daemon->euid,
+                       task_pid(current));
+                goto out_unlock_daemon;
+        }
+        if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
+                rc = -EBUSY;
+                printk(KERN_ERR "%s: Miscellaneous device handle may only be "
+                       "opened once per daemon; pid [0x%p] already has this "
+                       "handle open\n", __func__, daemon->pid);
+                goto out_unlock_daemon;
+        }
+        daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
+        atomic_inc(&ecryptfs_num_miscdev_opens);
+out_unlock_daemon:
+        mutex_unlock(&daemon->mux);
+out_module_put_unlock_daemon_list:
+        if (rc)
+                module_put(THIS_MODULE);
+out_unlock_daemon_list:
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+        return rc;
+}
+/**
+ * ecryptfs_miscdev_release
+ * @inode: inode of fs/ecryptfs/euid handle (ignored)
+ * @file: file for fs/ecryptfs/euid handle (ignored)
+ *
+ * This keeps the daemon registered until the daemon sends another
+ * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int
+ecryptfs_miscdev_release(struct inode *inode, struct file *file)
+{
+        struct ecryptfs_daemon *daemon = NULL;
+        int rc;
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
+                                          current->nsproxy->user_ns);
+        BUG_ON(rc || !daemon);
+        mutex_lock(&daemon->mux);
+        BUG_ON(daemon->pid != task_pid(current));
+        BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
+        daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
+        atomic_dec(&ecryptfs_num_miscdev_opens);
+        mutex_unlock(&daemon->mux);
+        rc = ecryptfs_exorcise_daemon(daemon);
+        if (rc) {
+                printk(KERN_CRIT "%s: Fatal error whilst attempting to "
+                       "shut down daemon; rc = [%d]. Please report this "
+                       "bug.\n", __func__, rc);
+                BUG();
+        }
+        module_put(THIS_MODULE);
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+        return rc;
+}
+/**
+ * ecryptfs_send_miscdev
+ * @data: Data to send to daemon; may be NULL
+ * @data_size: Amount of data to send to daemon
+ * @msg_ctx: Message context, which is used to handle the reply. If
+ *           this is NULL, then we do not expect a reply.
+ * @msg_type: Type of message
+ * @msg_flags: Flags for message
+ * @daemon: eCryptfs daemon object
+ *
+ * Add msg_ctx to queue and then, if it exists, notify the blocked
+ * miscdevess about the data being available. Must be called with
+ * ecryptfs_daemon_hash_mux held.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_send_miscdev(char *data, size_t data_size,
+                          struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
+                          u16 msg_flags, struct ecryptfs_daemon *daemon)
+{
+        int rc = 0;
+        mutex_lock(&msg_ctx->mux);
+        if (data) {
+                msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
+                                       GFP_KERNEL);
+                if (!msg_ctx->msg) {
+                        rc = -ENOMEM;
+                        printk(KERN_ERR "%s: Out of memory whilst attempting "
+                               "to kmalloc(%Zd, GFP_KERNEL)\n", __func__,
+                               (sizeof(*msg_ctx->msg) + data_size));
+                        goto out_unlock;
+                }
+        } else
+                msg_ctx->msg = NULL;
+        msg_ctx->msg->index = msg_ctx->index;
+        msg_ctx->msg->data_len = data_size;
+        msg_ctx->type = msg_type;
+        if (data) {
+                memcpy(msg_ctx->msg->data, data, data_size);
+                msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
+        } else
+                msg_ctx->msg_size = 0;
+        mutex_lock(&daemon->mux);
+        list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
+        daemon->num_queued_msg_ctx++;
+        wake_up_interruptible(&daemon->wait);
+        mutex_unlock(&daemon->mux);
+out_unlock:
+        mutex_unlock(&msg_ctx->mux);
+        return rc;
+}
+/**
+ * ecryptfs_miscdev_read - format and send message from queue
+ * @file: fs/ecryptfs/euid miscdevfs handle (ignored)
+ * @buf: User buffer into which to copy the next message on the daemon queue
+ * @count: Amount of space available in @buf
+ * @ppos: Offset in file (ignored)
+ *
+ * Pulls the most recent message from the daemon queue, formats it for
+ * being sent via a miscdevfs handle, and copies it into @buf
+ *
+ * Returns the number of bytes copied into the user buffer
+ */
+static ssize_t
+ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
+                      loff_t *ppos)
+{
+        struct ecryptfs_daemon *daemon;
+        struct ecryptfs_msg_ctx *msg_ctx;
+        size_t packet_length_size;
+        u32 counter_nbo;
+        char packet_length[3];
+        size_t i;
+        size_t total_length;
+        int rc;
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        /* TODO: Just use file->private_data? */
+        rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
+                                          current->nsproxy->user_ns);
+        BUG_ON(rc || !daemon);
+        mutex_lock(&daemon->mux);
+        if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+                rc = 0;
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
+                printk(KERN_WARNING "%s: Attempt to read from zombified "
+                       "daemon\n", __func__);
+                goto out_unlock_daemon;
+        }
+        if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) {
+                rc = 0;
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
+                goto out_unlock_daemon;
+        }
+        /* This daemon will not go away so long as this flag is set */
+        daemon->flags |= ECRYPTFS_DAEMON_IN_READ;
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+check_list:
+        if (list_empty(&daemon->msg_ctx_out_queue)) {
+                mutex_unlock(&daemon->mux);
+                rc = wait_event_interruptible(
+                        daemon->wait, !list_empty(&daemon->msg_ctx_out_queue));
+                mutex_lock(&daemon->mux);
+                if (rc < 0) {
+                        rc = 0;
+                        goto out_unlock_daemon;
+                }
+        }
+        if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
+                rc = 0;
+                goto out_unlock_daemon;
+        }
+        if (list_empty(&daemon->msg_ctx_out_queue)) {
+                /* Something else jumped in since the
+                 * wait_event_interruptable() and removed the
+                 * message from the queue; try again */
+                goto check_list;
+        }
+        BUG_ON(current->euid != daemon->euid);
+        BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
+        BUG_ON(task_pid(current) != daemon->pid);
+        msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
+                                   struct ecryptfs_msg_ctx, daemon_out_list);
+        BUG_ON(!msg_ctx);
+        mutex_lock(&msg_ctx->mux);
+        if (msg_ctx->msg) {
+                rc = ecryptfs_write_packet_length(packet_length,
+                                                  msg_ctx->msg_size,
+                                                  &packet_length_size);
+                if (rc) {
+                        rc = 0;
+                        printk(KERN_WARNING "%s: Error writing packet length; "
+                               "rc = [%d]\n", __func__, rc);
+                        goto out_unlock_msg_ctx;
+                }
+        } else {
+                packet_length_size = 0;
+                msg_ctx->msg_size = 0;
+        }
+        /* miscdevfs packet format:
+         *  Octet 0: Type
+         *  Octets 1-4: network byte order msg_ctx->counter
+         *  Octets 5-N0: Size of struct ecryptfs_message to follow
+         *  Octets N0-N1: struct ecryptfs_message (including data)
+         *
+         *  Octets 5-N1 not written if the packet type does not
+         *  include a message */
+        total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size);
+        if (count < total_length) {
+                rc = 0;
+                printk(KERN_WARNING "%s: Only given user buffer of "
+                       "size [%Zd], but we need [%Zd] to read the "
+                       "pending message\n", __func__, count, total_length);
+                goto out_unlock_msg_ctx;
+        }
+        i = 0;
+        buf[i++] = msg_ctx->type;
+        counter_nbo = cpu_to_be32(msg_ctx->counter);
+        memcpy(&buf[i], (char *)&counter_nbo, 4);
+        i += 4;
+        if (msg_ctx->msg) {
+                memcpy(&buf[i], packet_length, packet_length_size);
+                i += packet_length_size;
+                rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size);
+                if (rc) {
+                        printk(KERN_ERR "%s: copy_to_user returned error "
+                               "[%d]\n", __func__, rc);
+                        goto out_unlock_msg_ctx;
+                }
+                i += msg_ctx->msg_size;
+        }
+        rc = i;
+        list_del(&msg_ctx->daemon_out_list);
+        kfree(msg_ctx->msg);
+        msg_ctx->msg = NULL;
+        /* We do not expect a reply from the userspace daemon for any
+         * message type other than ECRYPTFS_MSG_REQUEST */
+        if (msg_ctx->type != ECRYPTFS_MSG_REQUEST)
+                ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
+out_unlock_msg_ctx:
+        mutex_unlock(&msg_ctx->mux);
+out_unlock_daemon:
+        daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ;
+        mutex_unlock(&daemon->mux);
+        return rc;
+}
+/**
+ * ecryptfs_miscdev_helo
+ * @euid: effective user id of miscdevess sending helo packet
+ * @user_ns: The namespace in which @euid applies
+ * @pid: miscdevess id of miscdevess sending helo packet
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
+                                 struct pid *pid)
+{
+        int rc;
+        rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
+                                   pid);
+        if (rc)
+                printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
+        return rc;
+}
+/**
+ * ecryptfs_miscdev_quit
+ * @euid: effective user id of miscdevess sending quit packet
+ * @user_ns: The namespace in which @euid applies
+ * @pid: miscdevess id of miscdevess sending quit packet
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
+                                 struct pid *pid)
+{
+        int rc;
+        rc = ecryptfs_process_quit(euid, user_ns, pid);
+        if (rc)
+                printk(KERN_WARNING
+                       "Error processing QUIT message; rc = [%d]\n", rc);
+        return rc;
+}
+/**
+ * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
+ * @data: Bytes comprising struct ecryptfs_message
+ * @data_size: sizeof(struct ecryptfs_message) + data len
+ * @euid: Effective user id of miscdevess sending the miscdev response
+ * @user_ns: The namespace in which @euid applies
+ * @pid: Miscdevess id of miscdevess sending the miscdev response
+ * @seq: Sequence number for miscdev response packet
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_miscdev_response(char *data, size_t data_size,
+                                     uid_t euid, struct user_namespace *user_ns,
+                                     struct pid *pid, u32 seq)
+{
+        struct ecryptfs_message *msg = (struct ecryptfs_message *)data;
+        int rc;
+        if ((sizeof(*msg) + msg->data_len) != data_size) {
+                printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
+                       "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__,
+                       (sizeof(*msg) + msg->data_len), data_size);
+                rc = -EINVAL;
+                goto out;
+        }
+        rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq);
+        if (rc)
+                printk(KERN_ERR
+                       "Error processing response message; rc = [%d]\n", rc);
+out:
+        return rc;
+}
+/**
+ * ecryptfs_miscdev_write - handle write to daemon miscdev handle
+ * @file: File for misc dev handle (ignored)
+ * @buf: Buffer containing user data
+ * @count: Amount of data in @buf
+ * @ppos: Pointer to offset in file (ignored)
+ *
+ * miscdevfs packet format:
+ *  Octet 0: Type
+ *  Octets 1-4: network byte order msg_ctx->counter (0's for non-response)
+ *  Octets 5-N0: Size of struct ecryptfs_message to follow
+ *  Octets N0-N1: struct ecryptfs_message (including data)
+ *
+ * Returns the number of bytes read from @buf
+ */
+static ssize_t
+ecryptfs_miscdev_write(struct file *file, const char __user *buf,
+                       size_t count, loff_t *ppos)
+{
+        u32 counter_nbo, seq;
+        size_t packet_size, packet_size_length, i;
+        ssize_t sz = 0;
+        char *data;
+        int rc;
+        if (count == 0)
+                goto out;
+        data = kmalloc(count, GFP_KERNEL);
+        if (!data) {
+                printk(KERN_ERR "%s: Out of memory whilst attempting to "
+                       "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count);
+                goto out;
+        }
+        rc = copy_from_user(data, buf, count);
+        if (rc) {
+                printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
+                       __func__, rc);
+                goto out_free;
+        }
+        sz = count;
+        i = 0;
+        switch (data[i++]) {
+        case ECRYPTFS_MSG_RESPONSE:
+                if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
+                        printk(KERN_WARNING "%s: Minimum acceptable packet "
+                               "size is [%Zd], but amount of data written is "
+                               "only [%Zd]. Discarding response packet.\n",
+                               __func__,
+                               (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
+                               count);
+                        goto out_free;
+                }
+                memcpy((char *)&counter_nbo, &data[i], 4);
+                seq = be32_to_cpu(counter_nbo);
+                i += 4;
+                rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
+                                                  &packet_size_length);
+                if (rc) {
+                        printk(KERN_WARNING "%s: Error parsing packet length; "
+                               "rc = [%d]\n", __func__, rc);
+                        goto out_free;
+                }
+                i += packet_size_length;
+                if ((1 + 4 + packet_size_length + packet_size) != count) {
+                        printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])"
+                               " + packet_size([%Zd]))([%Zd]) != "
+                               "count([%Zd]). Invalid packet format.\n",
+                               __func__, packet_size_length, packet_size,
+                               (1 + packet_size_length + packet_size), count);
+                        goto out_free;
+                }
+                rc = ecryptfs_miscdev_response(&data[i], packet_size,
+                                               current->euid,
+                                               current->nsproxy->user_ns,
+                                               task_pid(current), seq);
+                if (rc)
+                        printk(KERN_WARNING "%s: Failed to deliver miscdev "
+                               "response to requesting operation; rc = [%d]\n",
+                               __func__, rc);
+                break;
+        case ECRYPTFS_MSG_HELO:
+                rc = ecryptfs_miscdev_helo(current->euid,
+                                           current->nsproxy->user_ns,
+                                           task_pid(current));
+                if (rc) {
+                        printk(KERN_ERR "%s: Error attempting to process "
+                               "helo from pid [0x%p]; rc = [%d]\n", __func__,
+                               task_pid(current), rc);
+                        goto out_free;
+                }
+                break;
+        case ECRYPTFS_MSG_QUIT:
+                rc = ecryptfs_miscdev_quit(current->euid,
+                                           current->nsproxy->user_ns,
+                                           task_pid(current));
+                if (rc) {
+                        printk(KERN_ERR "%s: Error attempting to process "
+                               "quit from pid [0x%p]; rc = [%d]\n", __func__,
+                               task_pid(current), rc);
+                        goto out_free;
+                }
+                break;
+        default:
+                ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
+                                "message of unrecognized type [%d]\n",
+                                data[0]);
+                break;
+        }
+out_free:
+        kfree(data);
+out:
+        return sz;
+}
+static const struct file_operations ecryptfs_miscdev_fops = {
+        .open    = ecryptfs_miscdev_open,
+        .poll    = ecryptfs_miscdev_poll,
+        .read    = ecryptfs_miscdev_read,
+        .write   = ecryptfs_miscdev_write,
+        .release = ecryptfs_miscdev_release,
+};
+static struct miscdevice ecryptfs_miscdev = {
+        .minor = MISC_DYNAMIC_MINOR,
+        .name  = "ecryptfs",
+        .fops  = &ecryptfs_miscdev_fops
+};
+/**
+ * ecryptfs_init_ecryptfs_miscdev
+ *
+ * Messages sent to the userspace daemon from the kernel are placed on
+ * a queue associated with the daemon. The next read against the
+ * miscdev handle by that daemon will return the oldest message placed
+ * on the message queue for the daemon.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_init_ecryptfs_miscdev(void)
+{
+        int rc;
+        atomic_set(&ecryptfs_num_miscdev_opens, 0);
+        mutex_lock(&ecryptfs_daemon_hash_mux);
+        rc = misc_register(&ecryptfs_miscdev);
+        if (rc)
+                printk(KERN_ERR "%s: Failed to register miscellaneous device "
+                       "for communications with userspace daemons; rc = [%d]\n",
+                       __func__, rc);
+        mutex_unlock(&ecryptfs_daemon_hash_mux);
+        return rc;
+}
+/**
+ * ecryptfs_destroy_ecryptfs_miscdev
+ *
+ * All of the daemons must be exorcised prior to calling this
+ * function.
+ */
+void ecryptfs_destroy_ecryptfs_miscdev(void)
+{
+        BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0);
+        misc_deregister(&ecryptfs_miscdev);
+}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 6df1debdccce..2b6fe1e6e8ba 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
                        flush_dcache_page(page);
                        if (rc) {
                                printk(KERN_ERR "%s: Error reading xattr "
-                                       "region; rc = [%d]\n", __FUNCTION__, rc);
+                                       "region; rc = [%d]\n", __func__, rc);
                                goto out;
                        }
                } else {
@@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
                        if (rc) {
                                printk(KERN_ERR "%s: Error attempting to read "
                                       "extent at offset [%lld] in the lower "
-                                       "file; rc = [%d]\n", __FUNCTION__,
+                                       "file; rc = [%d]\n", __func__,
                                       lower_offset, rc);
                                goto out;
                        }
@@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
                                       "the encrypted content from the lower "
                                       "file whilst inserting the metadata "
                                       "from the xattr into the header; rc = "
-                                       "[%d]\n", __FUNCTION__, rc);
+                                       "[%d]\n", __func__, rc);
                                goto out;
                        }
@@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
                        if (rc) {
                                printk(KERN_ERR "%s: Error attemping to read "
                                       "lower page segment; rc = [%d]\n",
-                                       __FUNCTION__, rc);
+                                       __func__, rc);
                                ClearPageUptodate(page);
                                goto out;
                        } else
@@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
                                               "from the lower file whilst "
                                               "inserting the metadata from "
                                               "the xattr into the header; rc "
-                                               "= [%d]\n", __FUNCTION__, rc);
+                                               "= [%d]\n", __func__, rc);
                                        ClearPageUptodate(page);
                                        goto out;
                                }
@@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
                                if (rc) {
                                        printk(KERN_ERR "%s: Error reading "
                                               "page; rc = [%d]\n",
-                                               __FUNCTION__, rc);
+                                               __func__, rc);
                                        ClearPageUptodate(page);
                                        goto out;
                                }
@@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
                        if (rc) {
                                printk(KERN_ERR "%s: Error decrypting page "
                                       "at index [%ld]; rc = [%d]\n",
-                                       __FUNCTION__, page->index, rc);
+                                       __func__, page->index, rc);
                                ClearPageUptodate(page);
                                goto out;
                        }
@@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
                        if (rc) {
                                printk(KERN_ERR "%s: Error on attempt to "
                                       "truncate to (higher) offset [%lld];"
-                                       " rc = [%d]\n", __FUNCTION__,
+                                       " rc = [%d]\n", __func__,
                                       prev_page_end_size, rc);
                                goto out;
                        }
@@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
        kfree(file_size_virt);
        if (rc)
                printk(KERN_ERR "%s: Error writing file size to header; "
-                       "rc = [%d]\n", __FUNCTION__, rc);
+                       "rc = [%d]\n", __func__, rc);
 out:
        return rc;
 }
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index f638a698dc52..e0abad62b395 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock;
 * upon sending the message; non-zero upon error.
 */
 int ecryptfs_send_netlink(char *data, int data_len,
-                          struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
+                          struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
-                          u16 msg_flags, pid_t daemon_pid)
+                          u16 msg_flags, struct pid *daemon_pid)
 {
        struct sk_buff *skb;
        struct nlmsghdr *nlh;
@@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
                ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
                goto out;
        }
-        nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0,
+        nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0,
                        msg_type, payload_len);
        nlh->nlmsg_flags = msg_flags;
        if (msg_ctx && payload_len) {
@@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
                msg->data_len = data_len;
                memcpy(msg->data, data, data_len);
        }
-        rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0);
+        rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0);
        if (rc < 0) {
                ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
                                "message; rc = [%d]\n", rc);
@@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
 {
        struct nlmsghdr *nlh = nlmsg_hdr(skb);
        struct ecryptfs_message *msg = NLMSG_DATA(nlh);
+        struct pid *pid;
        int rc;
        if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
@@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
                                "incorrectly specified data length\n");
                goto out;
        }
-        rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid,
+        pid = find_get_pid(NETLINK_CREDS(skb)->pid);
-                                       NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq);
+        rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL,
+                                       pid, nlh->nlmsg_seq);
+        put_pid(pid);
        if (rc)
                printk(KERN_ERR
                       "Error processing response message; rc = [%d]\n", rc);
@@ -126,11 +129,13 @@ out:
 */
 static int ecryptfs_process_nl_helo(struct sk_buff *skb)
 {
+        struct pid *pid;
        int rc;
+        pid = find_get_pid(NETLINK_CREDS(skb)->pid);
        rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
-                                   NETLINK_CREDS(skb)->uid,
+                                   NETLINK_CREDS(skb)->uid, NULL, pid);
-                                   NETLINK_CREDS(skb)->pid);
+        put_pid(pid);
        if (rc)
                printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
        return rc;
@@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb)
 */
 static int ecryptfs_process_nl_quit(struct sk_buff *skb)
 {
+        struct pid *pid;
        int rc;
-        rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid,
+        pid = find_get_pid(NETLINK_CREDS(skb)->pid);
-                                   NETLINK_CREDS(skb)->pid);
+        rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid);
+        put_pid(pid);
        if (rc)
                printk(KERN_WARNING
                       "Error processing QUIT message; rc = [%d]\n", rc);
@@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb)
                goto free;
        }
        switch (nlh->nlmsg_type) {
-                case ECRYPTFS_NLMSG_RESPONSE:
+                case ECRYPTFS_MSG_RESPONSE:
                        if (ecryptfs_process_nl_response(skb)) {
                                ecryptfs_printk(KERN_WARNING, "Failed to "
                                                "deliver netlink response to "
                                                "requesting operation\n");
                        }
                        break;
-                case ECRYPTFS_NLMSG_HELO:
+                case ECRYPTFS_MSG_HELO:
                        if (ecryptfs_process_nl_helo(skb)) {
                                ecryptfs_printk(KERN_WARNING, "Failed to "
                                                "fulfill HELO request\n");
                        }
                        break;
-                case ECRYPTFS_NLMSG_QUIT:
+                case ECRYPTFS_MSG_QUIT:
                        if (ecryptfs_process_nl_quit(skb)) {
                                ecryptfs_printk(KERN_WARNING, "Failed to "
                                                "fulfill QUIT request\n");
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0c4928623bbc..ebf55150be56 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
        set_fs(fs_save);
        if (octets_written < 0) {
                printk(KERN_ERR "%s: octets_written = [%td]; "
-                       "expected [%td]\n", __FUNCTION__, octets_written, size);
+                       "expected [%td]\n", __func__, octets_written, size);
                rc = -EINVAL;
        }
        mutex_unlock(&inode_info->lower_file_mutex);
@@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                        rc = PTR_ERR(ecryptfs_page);
                        printk(KERN_ERR "%s: Error getting page at "
                               "index [%ld] from eCryptfs inode "
-                               "mapping; rc = [%d]\n", __FUNCTION__,
+                               "mapping; rc = [%d]\n", __func__,
                               ecryptfs_page_idx, rc);
                        goto out;
                }
@@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                        if (rc) {
                                printk(KERN_ERR "%s: Error decrypting "
                                       "page; rc = [%d]\n",
-                                       __FUNCTION__, rc);
+                                       __func__, rc);
                                ClearPageUptodate(ecryptfs_page);
                                page_cache_release(ecryptfs_page);
                                goto out;
@@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
                page_cache_release(ecryptfs_page);
                if (rc) {
                        printk(KERN_ERR "%s: Error encrypting "
-                               "page; rc = [%d]\n", __FUNCTION__, rc);
+                               "page; rc = [%d]\n", __func__, rc);
                        goto out;
                }
                pos += num_bytes;
@@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
        set_fs(fs_save);
        if (octets_read < 0) {
                printk(KERN_ERR "%s: octets_read = [%td]; "
-                       "expected [%td]\n", __FUNCTION__, octets_read, size);
+                       "expected [%td]\n", __func__, octets_read, size);
                rc = -EINVAL;
        }
        mutex_unlock(&inode_info->lower_file_mutex);
@@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
                printk(KERN_ERR "%s: Attempt to read data past the end of the "
                        "file; offset = [%lld]; size = [%td]; "
                       "ecryptfs_file_size = [%lld]\n",
-                       __FUNCTION__, offset, size, ecryptfs_file_size);
+                       __func__, offset, size, ecryptfs_file_size);
                goto out;
        }
        pos = offset;
@@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
                        rc = PTR_ERR(ecryptfs_page);
                        printk(KERN_ERR "%s: Error getting page at "
                               "index [%ld] from eCryptfs inode "
-                               "mapping; rc = [%d]\n", __FUNCTION__,
+                               "mapping; rc = [%d]\n", __func__,
                               ecryptfs_page_idx, rc);
                        goto out;
                }
                rc = ecryptfs_decrypt_page(ecryptfs_page);
                if (rc) {
                        printk(KERN_ERR "%s: Error decrypting "
-                               "page; rc = [%d]\n", __FUNCTION__, rc);
+                               "page; rc = [%d]\n", __func__, rc);
                        ClearPageUptodate(ecryptfs_page);
                        page_cache_release(ecryptfs_page);
                        goto out;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130cd50ac..343942deeec1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd)
 asmlinkage long sys_eventfd(unsigned int count)
 {
-        int error, fd;
+        int fd;
        struct eventfd_ctx *ctx;
-        struct file *file;
-        struct inode *inode;
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx)
@@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count)
         * When we call this, the initialization must be complete, since
         * anon_inode_getfd() will install the fd.
         */
-        error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]",
+        fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
-                                 &eventfd_fops, ctx);
+        if (fd < 0)
-        if (!error)
+                kfree(ctx);
-                return fd;
+        return fd;
-        kfree(ctx);
-        return error;
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a415f42d32cf..990c01d2d66b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
                (p1->file < p2->file ? -1 : p1->fd - p2->fd));
 }
-/* Special initialization for the RB tree node to detect linkage */
-static inline void ep_rb_initnode(struct rb_node *n)
-{
-        rb_set_parent(n, n);
-}
-/* Removes a node from the RB tree and marks it for a fast is-linked check */
-static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
-{
-        rb_erase(n, r);
-        rb_set_parent(n, n);
-}
-/* Fast check to verify that the item is linked to the main RB tree */
-static inline int ep_rb_linked(struct rb_node *n)
-{
-        return rb_parent(n) != n;
-}
 /* Tells us if the item is currently linked */
 static inline int ep_is_linked(struct list_head *p)
 {
@@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p)
 }
 /* Get the "struct epitem" from a wait queue pointer */
-static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
+static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
 {
        return container_of(p, struct eppoll_entry, wait)->base;
 }
 /* Get the "struct epitem" from an epoll queue wrapper */
-static inline struct epitem * ep_item_from_epqueue(poll_table *p)
+static inline struct epitem *ep_item_from_epqueue(poll_table *p)
 {
        return container_of(p, struct ep_pqueue, pt)->epi;
 }
@@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
                list_del_init(&epi->fllink);
        spin_unlock(&file->f_ep_lock);
-        if (ep_rb_linked(&epi->rbn))
+        rb_erase(&epi->rbn, &ep->rbr);
-                ep_rb_erase(&epi->rbn, &ep->rbr);
        spin_lock_irqsave(&ep->lock, flags);
        if (ep_is_linked(&epi->rdllink))
@@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
                goto error_return;
        /* Item initialization follow here ... */
-        ep_rb_initnode(&epi->rbn);
        INIT_LIST_HEAD(&epi->rdllink);
        INIT_LIST_HEAD(&epi->fllink);
        INIT_LIST_HEAD(&epi->pwqlist);
@@ -1071,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size)
 {
        int error, fd = -1;
        struct eventpoll *ep;
-        struct inode *inode;
-        struct file *file;
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
                     current, size));
@@ -1082,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size)
         * structure ( "struct eventpoll" ).
         */
        error = -EINVAL;
-        if (size <= 0 || (error = ep_alloc(&ep)) != 0)
+        if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+                fd = error;
                goto error_return;
+        }
        /*
         * Creates all the items needed to setup an eventpoll file. That is,
-         * a file structure, and inode and a free file descriptor.
+         * a file structure and a free file descriptor.
         */
-        error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]",
+        fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
-                                 &eventpoll_fops, ep);
+        if (fd < 0)
-        if (error)
+                ep_free(ep);
-                goto error_free;
+error_return:
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
                     current, size, fd));
        return fd;
-error_free:
-        ep_free(ep);
-error_return:
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-                     current, size, error));
-        return error;
 }
 /*
@@ -1262,7 +1234,7 @@ error_return:
        return error;
 }
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
 /*
 * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -1300,7 +1272,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
                if (error == -EINTR) {
                        memcpy(&current->saved_sigmask, &sigsaved,
                               sizeof(sigsaved));
-                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                        set_restore_sigmask();
                } else
                        sigprocmask(SIG_SETMASK, &sigsaved, NULL);
        }
@@ -1308,7 +1280,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
        return error;
 }
-#endif /* #ifdef TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
 static int __init eventpoll_init(void)
 {
@@ -1330,4 +1302,3 @@ static int __init eventpoll_init(void)
        return 0;
 }
 fs_initcall(eventpoll_init);
diff --git a/fs/exec.c b/fs/exec.c
index b152029f18f6..1f8a24aa1f8b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/mman.h>
 #include <linux/a.out.h>
 #include <linux/stat.h>
@@ -735,6 +736,7 @@ static int exec_mmap(struct mm_struct *mm)
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
        task_unlock(tsk);
+        mm_update_next_owner(old_mm);
        arch_pick_mmap_layout(mm);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
@@ -765,9 +767,7 @@ static int de_thread(struct task_struct *tsk)
        /*
         * Kill all other threads in the thread group.
-         * We must hold tasklist_lock to call zap_other_threads.
         */
-        read_lock(&tasklist_lock);
        spin_lock_irq(lock);
        if (signal_group_exit(sig)) {
                /*
@@ -775,21 +775,10 @@ static int de_thread(struct task_struct *tsk)
                 * return so that the signal is processed.
                 */
                spin_unlock_irq(lock);
-                read_unlock(&tasklist_lock);
                return -EAGAIN;
        }
-        /*
-         * child_reaper ignores SIGKILL, change it now.
-         * Reparenting needs write_lock on tasklist_lock,
-         * so it is safe to do it under read_lock.
-         */
-        if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
-                task_active_pid_ns(tsk)->child_reaper = tsk;
        sig->group_exit_task = tsk;
        zap_other_threads(tsk);
-        read_unlock(&tasklist_lock);
        /* Account for the thread group leader hanging around: */
        count = thread_group_leader(tsk) ? 1 : 2;
@@ -810,7 +799,7 @@ static int de_thread(struct task_struct *tsk)
        if (!thread_group_leader(tsk)) {
                leader = tsk->group_leader;
-                sig->notify_count = -1;
+                sig->notify_count = -1; /* for exit_notify() */
                for (;;) {
                        write_lock_irq(&tasklist_lock);
                        if (likely(leader->exit_state))
@@ -820,6 +809,8 @@ static int de_thread(struct task_struct *tsk)
                        schedule();
                }
+                if (unlikely(task_child_reaper(tsk) == leader))
+                        task_active_pid_ns(tsk)->child_reaper = tsk;
                /*
                 * The only record we have of the real-time age of a
                 * process, regardless of execs it's done, is start_time.
@@ -963,6 +954,8 @@ int flush_old_exec(struct linux_binprm * bprm)
        if (retval)
                goto out;
+        set_mm_exe_file(bprm->mm, bprm->file);
        /*
         * Release all of the old mmap stuff
         */
@@ -1268,7 +1261,6 @@ int do_execve(char * filename,
 {
        struct linux_binprm *bprm;
        struct file *file;
-        unsigned long env_p;
        struct files_struct *displaced;
        int retval;
@@ -1321,11 +1313,9 @@ int do_execve(char * filename,
        if (retval < 0)
                goto out;
-        env_p = bprm->p;
        retval = copy_strings(bprm->argc, argv, bprm);
        if (retval < 0)
                goto out;
-        bprm->argv_len = env_p - bprm->p;
        retval = search_binary_handler(bprm,regs);
        if (retval >= 0) {
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 109ab5e44eca..cc91227d3bb8 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
                        if (IS_ERR(ppd)) {
                                err = PTR_ERR(ppd);
                                dprintk("%s: get_parent of %ld failed, err %d\n",
-                                        __FUNCTION__, pd->d_inode->i_ino, err);
+                                        __func__, pd->d_inode->i_ino, err);
                                dput(pd);
                                break;
                        }
-                        dprintk("%s: find name of %lu in %lu\n", __FUNCTION__,
+                        dprintk("%s: find name of %lu in %lu\n", __func__,
                                pd->d_inode->i_ino, ppd->d_inode->i_ino);
                        err = exportfs_get_name(mnt, ppd, nbuf, pd);
                        if (err) {
@@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
                                        continue;
                                break;
                        }
-                        dprintk("%s: found name: %s\n", __FUNCTION__, nbuf);
+                        dprintk("%s: found name: %s\n", __func__, nbuf);
                        mutex_lock(&ppd->d_inode->i_mutex);
                        npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
                        mutex_unlock(&ppd->d_inode->i_mutex);
                        if (IS_ERR(npd)) {
                                err = PTR_ERR(npd);
                                dprintk("%s: lookup failed: %d\n",
-                                        __FUNCTION__, err);
+                                        __func__, err);
                                dput(ppd);
                                dput(pd);
                                break;
@@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
                        if (npd == pd)
                                noprogress = 0;
                        else
-                                printk("%s: npd != pd\n", __FUNCTION__);
+                                printk("%s: npd != pd\n", __func__);
                        dput(npd);
                        dput(ppd);
                        if (IS_ROOT(pd)) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cc47b76091bf..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
                new_i_size = pos + copied;
                if (new_i_size > EXT3_I(inode)->i_disksize)
                        EXT3_I(inode)->i_disksize = new_i_size;
-                copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+                ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-                if (copied < 0)
+                copied = ret2;
-                        ret = copied;
+                if (ret2 < 0)
+                        ret = ret2;
        }
        ret2 = ext3_journal_stop(handle);
        if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
        if (new_i_size > EXT3_I(inode)->i_disksize)
                EXT3_I(inode)->i_disksize = new_i_size;
-        copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+        ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-        if (copied < 0)
+        copied = ret2;
-                ret = copied;
+        if (ret2 < 0)
+                ret = ret2;
        ret2 = ext3_journal_stop(handle);
        if (!ret)
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d4a4f0e9ff69..175414ac2210 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1000,6 +1000,11 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
                        i.value = NULL;
                        error = ext3_xattr_block_set(handle, inode, &i, &bs);
                } else if (error == -ENOSPC) {
+                        if (EXT3_I(inode)->i_file_acl && !bs.s.base) {
+                                error = ext3_xattr_block_find(inode, &i, &bs);
+                                if (error)
+                                        goto cleanup;
+                        }
                        error = ext3_xattr_block_set(handle, inode, &i, &bs);
                        if (error)
                                goto cleanup;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #include "xattr.h"
 #include "acl.h"
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
                return ERR_PTR(-EINVAL);
        if (count == 0)
                return NULL;
-        acl = posix_acl_alloc(count, GFP_KERNEL);
+        acl = posix_acl_alloc(count, GFP_NOFS);
        if (!acl)
                return ERR_PTR(-ENOMEM);
        for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
        *size = ext4_acl_size(acl->a_count);
        ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
-                        sizeof(ext4_acl_entry), GFP_KERNEL);
+                        sizeof(ext4_acl_entry), GFP_NOFS);
        if (!ext_acl)
                return ERR_PTR(-ENOMEM);
        ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
        }
        retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
        if (retval > 0) {
-                value = kmalloc(retval, GFP_KERNEL);
+                value = kmalloc(retval, GFP_NOFS);
                if (!value)
                        return ERR_PTR(-ENOMEM);
                retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
                        if (error)
                                goto cleanup;
                }
-                clone = posix_acl_clone(acl, GFP_KERNEL);
+                clone = posix_acl_clone(acl, GFP_NOFS);
                error = -ENOMEM;
                if (!clone)
                        goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..30494c5da843 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "group.h"
 /*
 * balloc.c contains the blocks allocation and deallocation routines
 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                 ext4_group_t block_group, struct ext4_group_desc *gdp)
 {
-        unsigned long start;
        int bit, bit_max;
        unsigned free_blocks, group_blocks;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                /* If checksum is bad mark all blocks used to prevent allocation
                 * essentially implementing a per-group read-only flag. */
                if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-                        ext4_error(sb, __FUNCTION__,
+                        ext4_error(sb, __func__,
                                  "Checksum bad for group %lu\n", block_group);
                        gdp->bg_free_blocks_count = 0;
                        gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
        free_blocks = group_blocks - bit_max;
        if (bh) {
+                ext4_fsblk_t start;
                for (bit = 0; bit < bit_max; bit++)
                        ext4_set_bit(bit, bh->b_data);
-                start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
+                start = ext4_group_first_block_no(sb, block_group);
-                        le32_to_cpu(sbi->s_es->s_first_data_block);
                /* Set bits for block and inode bitmaps, and inode table */
                ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
                return 1;
 err_out:
-        ext4_error(sb, __FUNCTION__,
+        ext4_error(sb, __func__,
                        "Invalid block bitmap - "
                        "block_group = %d, block = %llu",
                        block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        bitmap_blk = ext4_block_bitmap(sb, desc);
        bh = sb_getblk(sb, bitmap_blk);
        if (unlikely(!bh)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                            "Cannot read block bitmap - "
                            "block_group = %d, block_bitmap = %llu",
                            (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,17 +281,17 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
        }
        if (bh_submit_read(bh) < 0) {
                put_bh(bh);
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                            "Cannot read block bitmap - "
                            "block_group = %d, block_bitmap = %llu",
                            (int)block_group, (unsigned long long)bitmap_blk);
                return NULL;
        }
-        if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) {
+        ext4_valid_block_bitmap(sb, desc, block_group, bh);
-                put_bh(bh);
+        /*
-                return NULL;
+         * file system mounted not to panic on error,
-        }
+         * continue with corrupt bitmap
+         */
        return bh;
 }
 /*
@@ -360,7 +360,7 @@ restart:
                BUG();
 }
 #define rsv_window_dump(root, verbose) \
-        __rsv_window_dump((root), (verbose), __FUNCTION__)
+        __rsv_window_dump((root), (verbose), __func__)
 #else
 #define rsv_window_dump(root, verbose) do {} while (0)
 #endif
@@ -740,7 +740,7 @@ do_more:
                if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
                                                bit + i, bitmap_bh->b_data)) {
                        jbd_unlock_bh_state(bitmap_bh);
-                        ext4_error(sb, __FUNCTION__,
+                        ext4_error(sb, __func__,
                                   "bit already cleared for block %llu",
                                   (ext4_fsblk_t)(block + i));
                        jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
        jbd_unlock_bh_state(bitmap_bh);
        spin_lock(sb_bgl_lock(sbi, block_group));
-        desc->bg_free_blocks_count =
+        le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
-                cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
-                        group_freed);
        desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1772,7 +1770,12 @@ allocated:
                            "Allocating block in system zone - "
                            "blocks from %llu, length %lu",
                             ret_block, num);
-                goto out;
+                /*
+                 * claim_block marked the blocks we allocated
+                 * as in use. So we may want to selectively
+                 * mark some of the blocks as free
+                 */
+                goto retry_alloc;
        }
        performed_allocation = 1;
@@ -1798,7 +1801,7 @@ allocated:
                        if (ext4_test_bit(grp_alloc_blk+i,
                                        bh2jh(bitmap_bh)->b_committed_data)) {
                                printk("%s: block was unexpectedly set in "
-                                        "b_committed_data\n", __FUNCTION__);
+                                        "b_committed_data\n", __func__);
                        }
                }
        }
@@ -1823,8 +1826,7 @@ allocated:
        spin_lock(sb_bgl_lock(sbi, group_no));
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-        gdp->bg_free_blocks_count =
+        le16_add_cpu(&gdp->bg_free_blocks_count, -num);
-                        cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
        spin_unlock(sb_bgl_lock(sbi, group_no));
        percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
 #include <linux/buffer_head.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #ifdef EXT4FS_DEBUG
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
+#include "ext4.h"
 static unsigned char ext4_filetype_table[] = {
        DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
        .readdir        = ext4_readdir,         /* we take BKL. needed?*/
-        .ioctl          = ext4_ioctl,           /* BKL held */
+        .unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
new file mode 100644
index 000000000000..8158083f7ac0
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +1,1205 @@
+/*
+ *  ext4.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+#ifndef _EXT4_H
+#define _EXT4_H
+#include <linux/types.h>
+#include <linux/blkdev.h>
+#include <linux/magic.h>
+#include "ext4_i.h"
+/*
+ * The second extended filesystem constants/structures
+ */
+/*
+ * Define EXT4FS_DEBUG to produce debug messages
+ */
+#undef EXT4FS_DEBUG
+/*
+ * Define EXT4_RESERVATION to reserve data blocks for expanding files
+ */
+#define EXT4_DEFAULT_RESERVE_BLOCKS     8
+/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
+#define EXT4_MAX_RESERVE_BLOCKS         1027
+#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
+/*
+ * Debug code
+ */
+#ifdef EXT4FS_DEBUG
+#define ext4_debug(f, a...)                                             \
+        do {                                                            \
+                printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:",       \
+                        __FILE__, __LINE__, __FUNCTION__);              \
+                printk (KERN_DEBUG f, ## a);                            \
+        } while (0)
+#else
+#define ext4_debug(f, a...)     do {} while (0)
+#endif
+#define EXT4_MULTIBLOCK_ALLOCATOR       1
+/* prefer goal again. length */
+#define EXT4_MB_HINT_MERGE              1
+/* blocks already reserved */
+#define EXT4_MB_HINT_RESERVED           2
+/* metadata is being allocated */
+#define EXT4_MB_HINT_METADATA           4
+/* first blocks in the file */
+#define EXT4_MB_HINT_FIRST              8
+/* search for the best chunk */
+#define EXT4_MB_HINT_BEST               16
+/* data is being allocated */
+#define EXT4_MB_HINT_DATA               32
+/* don't preallocate (for tails) */
+#define EXT4_MB_HINT_NOPREALLOC         64
+/* allocate for locality group */
+#define EXT4_MB_HINT_GROUP_ALLOC        128
+/* allocate goal blocks or none */
+#define EXT4_MB_HINT_GOAL_ONLY          256
+/* goal is meaningful */
+#define EXT4_MB_HINT_TRY_GOAL           512
+struct ext4_allocation_request {
+        /* target inode for block we're allocating */
+        struct inode *inode;
+        /* logical block in target inode */
+        ext4_lblk_t logical;
+        /* phys. target (a hint) */
+        ext4_fsblk_t goal;
+        /* the closest logical allocated block to the left */
+        ext4_lblk_t lleft;
+        /* phys. block for ^^^ */
+        ext4_fsblk_t pleft;
+        /* the closest logical allocated block to the right */
+        ext4_lblk_t lright;
+        /* phys. block for ^^^ */
+        ext4_fsblk_t pright;
+        /* how many blocks we want to allocate */
+        unsigned long len;
+        /* flags. see above EXT4_MB_HINT_* */
+        unsigned long flags;
+};
+/*
+ * Special inodes numbers
+ */
+#define EXT4_BAD_INO             1      /* Bad blocks inode */
+#define EXT4_ROOT_INO            2      /* Root inode */
+#define EXT4_BOOT_LOADER_INO     5      /* Boot loader inode */
+#define EXT4_UNDEL_DIR_INO       6      /* Undelete directory inode */
+#define EXT4_RESIZE_INO          7      /* Reserved group descriptors inode */
+#define EXT4_JOURNAL_INO         8      /* Journal inode */
+/* First non-reserved inode for old ext4 filesystems */
+#define EXT4_GOOD_OLD_FIRST_INO 11
+/*
+ * Maximal count of links to a file
+ */
+#define EXT4_LINK_MAX           65000
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT4_MIN_BLOCK_SIZE             1024
+#define EXT4_MAX_BLOCK_SIZE             65536
+#define EXT4_MIN_BLOCK_LOG_SIZE         10
+#ifdef __KERNEL__
+# define EXT4_BLOCK_SIZE(s)             ((s)->s_blocksize)
+#else
+# define EXT4_BLOCK_SIZE(s)             (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#endif
+#define EXT4_ADDR_PER_BLOCK(s)          (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
+#ifdef __KERNEL__
+# define EXT4_BLOCK_SIZE_BITS(s)        ((s)->s_blocksize_bits)
+#else
+# define EXT4_BLOCK_SIZE_BITS(s)        ((s)->s_log_block_size + 10)
+#endif
+#ifdef __KERNEL__
+#define EXT4_ADDR_PER_BLOCK_BITS(s)     (EXT4_SB(s)->s_addr_per_block_bits)
+#define EXT4_INODE_SIZE(s)              (EXT4_SB(s)->s_inode_size)
+#define EXT4_FIRST_INO(s)               (EXT4_SB(s)->s_first_ino)
+#else
+#define EXT4_INODE_SIZE(s)      (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+                                 EXT4_GOOD_OLD_INODE_SIZE : \
+                                 (s)->s_inode_size)
+#define EXT4_FIRST_INO(s)       (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+                                 EXT4_GOOD_OLD_FIRST_INO : \
+                                 (s)->s_first_ino)
+#endif
+#define EXT4_BLOCK_ALIGN(size, blkbits)         ALIGN((size), (1 << (blkbits)))
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext4_group_desc
+{
+        __le32  bg_block_bitmap_lo;     /* Blocks bitmap block */
+        __le32  bg_inode_bitmap_lo;     /* Inodes bitmap block */
+        __le32  bg_inode_table_lo;      /* Inodes table block */
+        __le16  bg_free_blocks_count;   /* Free blocks count */
+        __le16  bg_free_inodes_count;   /* Free inodes count */
+        __le16  bg_used_dirs_count;     /* Directories count */
+        __le16  bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
+        __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
+        __le16  bg_itable_unused;       /* Unused inodes count */
+        __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
+        __le32  bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
+        __le32  bg_inode_bitmap_hi;     /* Inodes bitmap block MSB */
+        __le32  bg_inode_table_hi;      /* Inodes table block MSB */
+        __le16  bg_free_blocks_count_hi;/* Free blocks count MSB */
+        __le16  bg_free_inodes_count_hi;/* Free inodes count MSB */
+        __le16  bg_used_dirs_count_hi;  /* Directories count MSB */
+        __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
+        __u32   bg_reserved2[3];
+};
+#define EXT4_BG_INODE_UNINIT    0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT    0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED    0x0004 /* On-disk itable initialized to zero */
+#ifdef __KERNEL__
+#include "ext4_sb.h"
+#endif
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#define EXT4_MIN_DESC_SIZE              32
+#define EXT4_MIN_DESC_SIZE_64BIT        64
+#define EXT4_MAX_DESC_SIZE              EXT4_MIN_BLOCK_SIZE
+#define EXT4_DESC_SIZE(s)               (EXT4_SB(s)->s_desc_size)
+#ifdef __KERNEL__
+# define EXT4_BLOCKS_PER_GROUP(s)       (EXT4_SB(s)->s_blocks_per_group)
+# define EXT4_DESC_PER_BLOCK(s)         (EXT4_SB(s)->s_desc_per_block)
+# define EXT4_INODES_PER_GROUP(s)       (EXT4_SB(s)->s_inodes_per_group)
+# define EXT4_DESC_PER_BLOCK_BITS(s)    (EXT4_SB(s)->s_desc_per_block_bits)
+#else
+# define EXT4_BLOCKS_PER_GROUP(s)       ((s)->s_blocks_per_group)
+# define EXT4_DESC_PER_BLOCK(s)         (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
+# define EXT4_INODES_PER_GROUP(s)       ((s)->s_inodes_per_group)
+#endif
+/*
+ * Constants relative to the data blocks
+ */
+#define EXT4_NDIR_BLOCKS                12
+#define EXT4_IND_BLOCK                  EXT4_NDIR_BLOCKS
+#define EXT4_DIND_BLOCK                 (EXT4_IND_BLOCK + 1)
+#define EXT4_TIND_BLOCK                 (EXT4_DIND_BLOCK + 1)
+#define EXT4_N_BLOCKS                   (EXT4_TIND_BLOCK + 1)
+/*
+ * Inode flags
+ */
+#define EXT4_SECRM_FL                   0x00000001 /* Secure deletion */
+#define EXT4_UNRM_FL                    0x00000002 /* Undelete */
+#define EXT4_COMPR_FL                   0x00000004 /* Compress file */
+#define EXT4_SYNC_FL                    0x00000008 /* Synchronous updates */
+#define EXT4_IMMUTABLE_FL               0x00000010 /* Immutable file */
+#define EXT4_APPEND_FL                  0x00000020 /* writes to file may only append */
+#define EXT4_NODUMP_FL                  0x00000040 /* do not dump file */
+#define EXT4_NOATIME_FL                 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT4_DIRTY_FL                   0x00000100
+#define EXT4_COMPRBLK_FL                0x00000200 /* One or more compressed clusters */
+#define EXT4_NOCOMPR_FL                 0x00000400 /* Don't compress */
+#define EXT4_ECOMPR_FL                  0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define EXT4_INDEX_FL                   0x00001000 /* hash-indexed directory */
+#define EXT4_IMAGIC_FL                  0x00002000 /* AFS directory */
+#define EXT4_JOURNAL_DATA_FL            0x00004000 /* file data should be journaled */
+#define EXT4_NOTAIL_FL                  0x00008000 /* file tail should not be merged */
+#define EXT4_DIRSYNC_FL                 0x00010000 /* dirsync behaviour (directories only) */
+#define EXT4_TOPDIR_FL                  0x00020000 /* Top of directory hierarchies*/
+#define EXT4_HUGE_FILE_FL               0x00040000 /* Set to each huge file */
+#define EXT4_EXTENTS_FL                 0x00080000 /* Inode uses extents */
+#define EXT4_EXT_MIGRATE                0x00100000 /* Inode is migrating */
+#define EXT4_RESERVED_FL                0x80000000 /* reserved for ext4 lib */
+#define EXT4_FL_USER_VISIBLE            0x000BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE         0x000380FF /* User modifiable flags */
+/*
+ * Inode dynamic state flags
+ */
+#define EXT4_STATE_JDATA                0x00000001 /* journaled data exists */
+#define EXT4_STATE_NEW                  0x00000002 /* inode is newly created */
+#define EXT4_STATE_XATTR                0x00000004 /* has in-inode xattrs */
+#define EXT4_STATE_NO_EXPAND            0x00000008 /* No space for expansion */
+/* Used to pass group descriptor data when online resize is done */
+struct ext4_new_group_input {
+        __u32 group;            /* Group number for this data */
+        __u64 block_bitmap;     /* Absolute block number of block bitmap */
+        __u64 inode_bitmap;     /* Absolute block number of inode bitmap */
+        __u64 inode_table;      /* Absolute block number of inode table start */
+        __u32 blocks_count;     /* Total number of blocks in this group */
+        __u16 reserved_blocks;  /* Number of reserved blocks in this group */
+        __u16 unused;
+};
+/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
+struct ext4_new_group_data {
+        __u32 group;
+        __u64 block_bitmap;
+        __u64 inode_bitmap;
+        __u64 inode_table;
+        __u32 blocks_count;
+        __u16 reserved_blocks;
+        __u16 unused;
+        __u32 free_blocks_count;
+};
+/*
+ * Following is used by preallocation code to tell get_blocks() that we
+ * want uninitialzed extents.
+ */
+#define EXT4_CREATE_UNINITIALIZED_EXT           2
+/*
+ * ioctl commands
+ */
+#define EXT4_IOC_GETFLAGS               FS_IOC_GETFLAGS
+#define EXT4_IOC_SETFLAGS               FS_IOC_SETFLAGS
+#define EXT4_IOC_GETVERSION             _IOR('f', 3, long)
+#define EXT4_IOC_SETVERSION             _IOW('f', 4, long)
+#define EXT4_IOC_GROUP_EXTEND           _IOW('f', 7, unsigned long)
+#define EXT4_IOC_GROUP_ADD              _IOW('f', 8,struct ext4_new_group_input)
+#define EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
+#define EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
+#ifdef CONFIG_JBD2_DEBUG
+#define EXT4_IOC_WAIT_FOR_READONLY      _IOR('f', 99, long)
+#endif
+#define EXT4_IOC_GETRSVSZ               _IOR('f', 5, long)
+#define EXT4_IOC_SETRSVSZ               _IOW('f', 6, long)
+#define EXT4_IOC_MIGRATE                _IO('f', 7)
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT4_IOC32_GETFLAGS             FS_IOC32_GETFLAGS
+#define EXT4_IOC32_SETFLAGS             FS_IOC32_SETFLAGS
+#define EXT4_IOC32_GETVERSION           _IOR('f', 3, int)
+#define EXT4_IOC32_SETVERSION           _IOW('f', 4, int)
+#define EXT4_IOC32_GETRSVSZ             _IOR('f', 5, int)
+#define EXT4_IOC32_SETRSVSZ             _IOW('f', 6, int)
+#define EXT4_IOC32_GROUP_EXTEND         _IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD2_DEBUG
+#define EXT4_IOC32_WAIT_FOR_READONLY    _IOR('f', 99, int)
+#endif
+#define EXT4_IOC32_GETVERSION_OLD       FS_IOC32_GETVERSION
+#define EXT4_IOC32_SETVERSION_OLD       FS_IOC32_SETVERSION
+/*
+ *  Mount options
+ */
+struct ext4_mount_options {
+        unsigned long s_mount_opt;
+        uid_t s_resuid;
+        gid_t s_resgid;
+        unsigned long s_commit_interval;
+#ifdef CONFIG_QUOTA
+        int s_jquota_fmt;
+        char *s_qf_names[MAXQUOTAS];
+#endif
+};
+/*
+ * Structure of an inode on the disk
+ */
+struct ext4_inode {
+        __le16  i_mode;         /* File mode */
+        __le16  i_uid;          /* Low 16 bits of Owner Uid */
+        __le32  i_size_lo;      /* Size in bytes */
+        __le32  i_atime;        /* Access time */
+        __le32  i_ctime;        /* Inode Change time */
+        __le32  i_mtime;        /* Modification time */
+        __le32  i_dtime;        /* Deletion Time */
+        __le16  i_gid;          /* Low 16 bits of Group Id */
+        __le16  i_links_count;  /* Links count */
+        __le32  i_blocks_lo;    /* Blocks count */
+        __le32  i_flags;        /* File flags */
+        union {
+                struct {
+                        __le32  l_i_version;
+                } linux1;
+                struct {
+                        __u32  h_i_translator;
+                } hurd1;
+                struct {
+                        __u32  m_i_reserved1;
+                } masix1;
+        } osd1;                         /* OS dependent 1 */
+        __le32  i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
+        __le32  i_generation;   /* File version (for NFS) */
+        __le32  i_file_acl_lo;  /* File ACL */
+        __le32  i_size_high;
+        __le32  i_obso_faddr;   /* Obsoleted fragment address */
+        union {
+                struct {
+                        __le16  l_i_blocks_high; /* were l_i_reserved1 */
+                        __le16  l_i_file_acl_high;
+                        __le16  l_i_uid_high;   /* these 2 fields */
+                        __le16  l_i_gid_high;   /* were reserved2[0] */
+                        __u32   l_i_reserved2;
+                } linux2;
+                struct {
+                        __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
+                        __u16   h_i_mode_high;
+                        __u16   h_i_uid_high;
+                        __u16   h_i_gid_high;
+                        __u32   h_i_author;
+                } hurd2;
+                struct {
+                        __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
+                        __le16  m_i_file_acl_high;
+                        __u32   m_i_reserved2[2];
+                } masix2;
+        } osd2;                         /* OS dependent 2 */
+        __le16  i_extra_isize;
+        __le16  i_pad1;
+        __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
+        __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
+        __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+        __le32  i_crtime;       /* File Creation time */
+        __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
+        __le32  i_version_hi;   /* high 32 bits for 64-bit version */
+};
+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK  (~0UL << EXT4_EPOCH_BITS)
+/*
+ * Extended fields will fit into an inode if the filesystem was formatted
+ * with large inodes (-I 256 or larger) and there are not currently any EAs
+ * consuming all of the available space. For new inodes we always reserve
+ * enough space for the kernel's known extended fields, but for inodes
+ * created with an old kernel this might not have been the case. None of
+ * the extended inode fields is critical for correct filesystem operation.
+ * This macro checks if a certain field fits in the inode. Note that
+ * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
+ */
+#define EXT4_FITS_IN_INODE(ext4_inode, einode, field)   \
+        ((offsetof(typeof(*ext4_inode), field) +        \
+          sizeof((ext4_inode)->field))                  \
+        <= (EXT4_GOOD_OLD_INODE_SIZE +                  \
+            (einode)->i_extra_isize))                   \
+static inline __le32 ext4_encode_extra_time(struct timespec *time)
+{
+       return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+                           time->tv_sec >> 32 : 0) |
+                           ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
+}
+static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+{
+       if (sizeof(time->tv_sec) > 4)
+               time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
+                               << 32;
+       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
+}
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                          \
+do {                                                                           \
+        (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);               \
+        if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
+                (raw_inode)->xtime ## _extra =                                 \
+                                ext4_encode_extra_time(&(inode)->xtime);       \
+} while (0)
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                        \
+do {                                                                           \
+        if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
+                (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
+        if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
+                (raw_inode)->xtime ## _extra =                                 \
+                                ext4_encode_extra_time(&(einode)->xtime);      \
+} while (0)
+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)                          \
+do {                                                                           \
+        (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);       \
+        if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     \
+                ext4_decode_extra_time(&(inode)->xtime,                        \
+                                       raw_inode->xtime ## _extra);            \
+} while (0)
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                        \
+do {                                                                           \
+        if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
+                (einode)->xtime.tv_sec =                                       \
+                        (signed)le32_to_cpu((raw_inode)->xtime);               \
+        if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
+                ext4_decode_extra_time(&(einode)->xtime,                       \
+                                       raw_inode->xtime ## _extra);            \
+} while (0)
+#define i_disk_version osd1.linux1.l_i_version
+#if defined(__KERNEL__) || defined(__linux__)
+#define i_reserved1     osd1.linux1.l_i_reserved1
+#define i_file_acl_high osd2.linux2.l_i_file_acl_high
+#define i_blocks_high   osd2.linux2.l_i_blocks_high
+#define i_uid_low       i_uid
+#define i_gid_low       i_gid
+#define i_uid_high      osd2.linux2.l_i_uid_high
+#define i_gid_high      osd2.linux2.l_i_gid_high
+#define i_reserved2     osd2.linux2.l_i_reserved2
+#elif defined(__GNU__)
+#define i_translator    osd1.hurd1.h_i_translator
+#define i_uid_high      osd2.hurd2.h_i_uid_high
+#define i_gid_high      osd2.hurd2.h_i_gid_high
+#define i_author        osd2.hurd2.h_i_author
+#elif defined(__masix__)
+#define i_reserved1     osd1.masix1.m_i_reserved1
+#define i_file_acl_high osd2.masix2.m_i_file_acl_high
+#define i_reserved2     osd2.masix2.m_i_reserved2
+#endif /* defined(__KERNEL__) || defined(__linux__) */
+/*
+ * File system states
+ */
+#define EXT4_VALID_FS                   0x0001  /* Unmounted cleanly */
+#define EXT4_ERROR_FS                   0x0002  /* Errors detected */
+#define EXT4_ORPHAN_FS                  0x0004  /* Orphans being recovered */
+/*
+ * Misc. filesystem flags
+ */
+#define EXT2_FLAGS_SIGNED_HASH          0x0001  /* Signed dirhash in use */
+#define EXT2_FLAGS_UNSIGNED_HASH        0x0002  /* Unsigned dirhash in use */
+#define EXT2_FLAGS_TEST_FILESYS         0x0004  /* to test development code */
+/*
+ * Mount flags
+ */
+#define EXT4_MOUNT_CHECK                0x00001 /* Do mount-time checks */
+#define EXT4_MOUNT_OLDALLOC             0x00002  /* Don't use the new Orlov allocator */
+#define EXT4_MOUNT_GRPID                0x00004 /* Create files with directory's group */
+#define EXT4_MOUNT_DEBUG                0x00008 /* Some debugging messages */
+#define EXT4_MOUNT_ERRORS_CONT          0x00010 /* Continue on errors */
+#define EXT4_MOUNT_ERRORS_RO            0x00020 /* Remount fs ro on errors */
+#define EXT4_MOUNT_ERRORS_PANIC         0x00040 /* Panic on errors */
+#define EXT4_MOUNT_MINIX_DF             0x00080 /* Mimics the Minix statfs */
+#define EXT4_MOUNT_NOLOAD               0x00100 /* Don't use existing journal*/
+#define EXT4_MOUNT_ABORT                0x00200 /* Fatal error detected */
+#define EXT4_MOUNT_DATA_FLAGS           0x00C00 /* Mode for data writes: */
+#define EXT4_MOUNT_JOURNAL_DATA         0x00400 /* Write data to journal */
+#define EXT4_MOUNT_ORDERED_DATA         0x00800 /* Flush data before commit */
+#define EXT4_MOUNT_WRITEBACK_DATA       0x00C00 /* No data ordering */
+#define EXT4_MOUNT_UPDATE_JOURNAL       0x01000 /* Update the journal format */
+#define EXT4_MOUNT_NO_UID32             0x02000  /* Disable 32-bit UIDs */
+#define EXT4_MOUNT_XATTR_USER           0x04000 /* Extended user attributes */
+#define EXT4_MOUNT_POSIX_ACL            0x08000 /* POSIX Access Control Lists */
+#define EXT4_MOUNT_RESERVATION          0x10000 /* Preallocation */
+#define EXT4_MOUNT_BARRIER              0x20000 /* Use block barriers */
+#define EXT4_MOUNT_NOBH                 0x40000 /* No bufferheads */
+#define EXT4_MOUNT_QUOTA                0x80000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA             0x100000 /* "old" user quota */
+#define EXT4_MOUNT_GRPQUOTA             0x200000 /* "old" group quota */
+#define EXT4_MOUNT_EXTENTS              0x400000 /* Extents support */
+#define EXT4_MOUNT_JOURNAL_CHECKSUM     0x800000 /* Journal checksums */
+#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
+#define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
+#define EXT4_MOUNT_MBALLOC              0x4000000 /* Buddy allocation support */
+/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
+#ifndef _LINUX_EXT2_FS_H
+#define clear_opt(o, opt)               o &= ~EXT4_MOUNT_##opt
+#define set_opt(o, opt)                 o |= EXT4_MOUNT_##opt
+#define test_opt(sb, opt)               (EXT4_SB(sb)->s_mount_opt & \
+                                         EXT4_MOUNT_##opt)
+#else
+#define EXT2_MOUNT_NOLOAD               EXT4_MOUNT_NOLOAD
+#define EXT2_MOUNT_ABORT                EXT4_MOUNT_ABORT
+#define EXT2_MOUNT_DATA_FLAGS           EXT4_MOUNT_DATA_FLAGS
+#endif
+#define ext4_set_bit                    ext2_set_bit
+#define ext4_set_bit_atomic             ext2_set_bit_atomic
+#define ext4_clear_bit                  ext2_clear_bit
+#define ext4_clear_bit_atomic           ext2_clear_bit_atomic
+#define ext4_test_bit                   ext2_test_bit
+#define ext4_find_first_zero_bit        ext2_find_first_zero_bit
+#define ext4_find_next_zero_bit         ext2_find_next_zero_bit
+#define ext4_find_next_bit              ext2_find_next_bit
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT4_DFL_MAX_MNT_COUNT          20      /* Allow 20 mounts */
+#define EXT4_DFL_CHECKINTERVAL          0       /* Don't use interval check */
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT4_ERRORS_CONTINUE            1       /* Continue execution */
+#define EXT4_ERRORS_RO                  2       /* Remount fs read-only */
+#define EXT4_ERRORS_PANIC               3       /* Panic */
+#define EXT4_ERRORS_DEFAULT             EXT4_ERRORS_CONTINUE
+/*
+ * Structure of the super block
+ */
+struct ext4_super_block {
+/*00*/  __le32  s_inodes_count;         /* Inodes count */
+        __le32  s_blocks_count_lo;      /* Blocks count */
+        __le32  s_r_blocks_count_lo;    /* Reserved blocks count */
+        __le32  s_free_blocks_count_lo; /* Free blocks count */
+/*10*/  __le32  s_free_inodes_count;    /* Free inodes count */
+        __le32  s_first_data_block;     /* First Data Block */
+        __le32  s_log_block_size;       /* Block size */
+        __le32  s_obso_log_frag_size;   /* Obsoleted fragment size */
+/*20*/  __le32  s_blocks_per_group;     /* # Blocks per group */
+        __le32  s_obso_frags_per_group; /* Obsoleted fragments per group */
+        __le32  s_inodes_per_group;     /* # Inodes per group */
+        __le32  s_mtime;                /* Mount time */
+/*30*/  __le32  s_wtime;                /* Write time */
+        __le16  s_mnt_count;            /* Mount count */
+        __le16  s_max_mnt_count;        /* Maximal mount count */
+        __le16  s_magic;                /* Magic signature */
+        __le16  s_state;                /* File system state */
+        __le16  s_errors;               /* Behaviour when detecting errors */
+        __le16  s_minor_rev_level;      /* minor revision level */
+/*40*/  __le32  s_lastcheck;            /* time of last check */
+        __le32  s_checkinterval;        /* max. time between checks */
+        __le32  s_creator_os;           /* OS */
+        __le32  s_rev_level;            /* Revision level */
+/*50*/  __le16  s_def_resuid;           /* Default uid for reserved blocks */
+        __le16  s_def_resgid;           /* Default gid for reserved blocks */
+        /*
+         * These fields are for EXT4_DYNAMIC_REV superblocks only.
+         *
+         * Note: the difference between the compatible feature set and
+         * the incompatible feature set is that if there is a bit set
+         * in the incompatible feature set that the kernel doesn't
+         * know about, it should refuse to mount the filesystem.
+         *
+         * e2fsck's requirements are more strict; if it doesn't know
+         * about a feature in either the compatible or incompatible
+         * feature set, it must abort and not try to meddle with
+         * things it doesn't understand...
+         */
+        __le32  s_first_ino;            /* First non-reserved inode */
+        __le16  s_inode_size;           /* size of inode structure */
+        __le16  s_block_group_nr;       /* block group # of this superblock */
+        __le32  s_feature_compat;       /* compatible feature set */
+/*60*/  __le32  s_feature_incompat;     /* incompatible feature set */
+        __le32  s_feature_ro_compat;    /* readonly-compatible feature set */
+/*68*/  __u8    s_uuid[16];             /* 128-bit uuid for volume */
+/*78*/  char    s_volume_name[16];      /* volume name */
+/*88*/  char    s_last_mounted[64];     /* directory where last mounted */
+/*C8*/  __le32  s_algorithm_usage_bitmap; /* For compression */
+        /*
+         * Performance hints.  Directory preallocation should only
+         * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+         */
+        __u8    s_prealloc_blocks;      /* Nr of blocks to try to preallocate*/
+        __u8    s_prealloc_dir_blocks;  /* Nr to preallocate for dirs */
+        __le16  s_reserved_gdt_blocks;  /* Per group desc for online growth */
+        /*
+         * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
+         */
+/*D0*/  __u8    s_journal_uuid[16];     /* uuid of journal superblock */
+/*E0*/  __le32  s_journal_inum;         /* inode number of journal file */
+        __le32  s_journal_dev;          /* device number of journal file */
+        __le32  s_last_orphan;          /* start of list of inodes to delete */
+        __le32  s_hash_seed[4];         /* HTREE hash seed */
+        __u8    s_def_hash_version;     /* Default hash version to use */
+        __u8    s_reserved_char_pad;
+        __le16  s_desc_size;            /* size of group descriptor */
+/*100*/ __le32  s_default_mount_opts;
+        __le32  s_first_meta_bg;        /* First metablock block group */
+        __le32  s_mkfs_time;            /* When the filesystem was created */
+        __le32  s_jnl_blocks[17];       /* Backup of the journal inode */
+        /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+/*150*/ __le32  s_blocks_count_hi;      /* Blocks count */
+        __le32  s_r_blocks_count_hi;    /* Reserved blocks count */
+        __le32  s_free_blocks_count_hi; /* Free blocks count */
+        __le16  s_min_extra_isize;      /* All inodes have at least # bytes */
+        __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+        __le32  s_flags;                /* Miscellaneous flags */
+        __le16  s_raid_stride;          /* RAID stride */
+        __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
+        __le64  s_mmp_block;            /* Block for multi-mount protection */
+        __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+        __u32   s_reserved[163];        /* Padding to the end of the block */
+};
+#ifdef __KERNEL__
+static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
+{
+        return sb->s_fs_info;
+}
+static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
+{
+        return container_of(inode, struct ext4_inode_info, vfs_inode);
+}
+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+        return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+                current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+{
+        return ino == EXT4_ROOT_INO ||
+                ino == EXT4_JOURNAL_INO ||
+                ino == EXT4_RESIZE_INO ||
+                (ino >= EXT4_FIRST_INO(sb) &&
+                 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
+}
+#else
+/* Assume that user mode programs are passing in an ext4fs superblock, not
+ * a kernel struct super_block.  This will allow us to call the feature-test
+ * macros from user land. */
+#define EXT4_SB(sb)     (sb)
+#endif
+#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
+/*
+ * Codes for operating systems
+ */
+#define EXT4_OS_LINUX           0
+#define EXT4_OS_HURD            1
+#define EXT4_OS_MASIX           2
+#define EXT4_OS_FREEBSD         3
+#define EXT4_OS_LITES           4
+/*
+ * Revision levels
+ */
+#define EXT4_GOOD_OLD_REV       0       /* The good old (original) format */
+#define EXT4_DYNAMIC_REV        1       /* V2 format w/ dynamic inode sizes */
+#define EXT4_CURRENT_REV        EXT4_GOOD_OLD_REV
+#define EXT4_MAX_SUPP_REV       EXT4_DYNAMIC_REV
+#define EXT4_GOOD_OLD_INODE_SIZE 128
+/*
+ * Feature set definitions
+ */
+#define EXT4_HAS_COMPAT_FEATURE(sb,mask)                        \
+        ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask)                     \
+        ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask)                      \
+        ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT4_SET_COMPAT_FEATURE(sb,mask)                        \
+        EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask)                     \
+        EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT4_SET_INCOMPAT_FEATURE(sb,mask)                      \
+        EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask)                      \
+        EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask)                   \
+        EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask)                    \
+        EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+#define EXT4_FEATURE_COMPAT_DIR_PREALLOC        0x0001
+#define EXT4_FEATURE_COMPAT_IMAGIC_INODES       0x0002
+#define EXT4_FEATURE_COMPAT_HAS_JOURNAL         0x0004
+#define EXT4_FEATURE_COMPAT_EXT_ATTR            0x0008
+#define EXT4_FEATURE_COMPAT_RESIZE_INODE        0x0010
+#define EXT4_FEATURE_COMPAT_DIR_INDEX           0x0020
+#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER     0x0001
+#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE       0x0002
+#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR        0x0004
+#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE        0x0008
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM         0x0010
+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK        0x0020
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE      0x0040
+#define EXT4_FEATURE_INCOMPAT_COMPRESSION       0x0001
+#define EXT4_FEATURE_INCOMPAT_FILETYPE          0x0002
+#define EXT4_FEATURE_INCOMPAT_RECOVER           0x0004 /* Needs recovery */
+#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV       0x0008 /* Journal device */
+#define EXT4_FEATURE_INCOMPAT_META_BG           0x0010
+#define EXT4_FEATURE_INCOMPAT_EXTENTS           0x0040 /* extents support */
+#define EXT4_FEATURE_INCOMPAT_64BIT             0x0080
+#define EXT4_FEATURE_INCOMPAT_MMP               0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG           0x0200
+#define EXT4_FEATURE_COMPAT_SUPP        EXT2_FEATURE_COMPAT_EXT_ATTR
+#define EXT4_FEATURE_INCOMPAT_SUPP      (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+                                         EXT4_FEATURE_INCOMPAT_RECOVER| \
+                                         EXT4_FEATURE_INCOMPAT_META_BG| \
+                                         EXT4_FEATURE_INCOMPAT_EXTENTS| \
+                                         EXT4_FEATURE_INCOMPAT_64BIT| \
+                                         EXT4_FEATURE_INCOMPAT_FLEX_BG)
+#define EXT4_FEATURE_RO_COMPAT_SUPP     (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                         EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                         EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+                                         EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
+                                         EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+                                         EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
+                                         EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define EXT4_DEF_RESUID         0
+#define EXT4_DEF_RESGID         0
+/*
+ * Default mount options
+ */
+#define EXT4_DEFM_DEBUG         0x0001
+#define EXT4_DEFM_BSDGROUPS     0x0002
+#define EXT4_DEFM_XATTR_USER    0x0004
+#define EXT4_DEFM_ACL           0x0008
+#define EXT4_DEFM_UID16         0x0010
+#define EXT4_DEFM_JMODE         0x0060
+#define EXT4_DEFM_JMODE_DATA    0x0020
+#define EXT4_DEFM_JMODE_ORDERED 0x0040
+#define EXT4_DEFM_JMODE_WBACK   0x0060
+/*
+ * Structure of a directory entry
+ */
+#define EXT4_NAME_LEN 255
+struct ext4_dir_entry {
+        __le32  inode;                  /* Inode number */
+        __le16  rec_len;                /* Directory entry length */
+        __le16  name_len;               /* Name length */
+        char    name[EXT4_NAME_LEN];    /* File name */
+};
+/*
+ * The new version of the directory entry.  Since EXT4 structures are
+ * stored in intel byte order, and the name_len field could never be
+ * bigger than 255 chars, it's safe to reclaim the extra byte for the
+ * file_type field.
+ */
+struct ext4_dir_entry_2 {
+        __le32  inode;                  /* Inode number */
+        __le16  rec_len;                /* Directory entry length */
+        __u8    name_len;               /* Name length */
+        __u8    file_type;
+        char    name[EXT4_NAME_LEN];    /* File name */
+};
+/*
+ * Ext4 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+#define EXT4_FT_UNKNOWN         0
+#define EXT4_FT_REG_FILE        1
+#define EXT4_FT_DIR             2
+#define EXT4_FT_CHRDEV          3
+#define EXT4_FT_BLKDEV          4
+#define EXT4_FT_FIFO            5
+#define EXT4_FT_SOCK            6
+#define EXT4_FT_SYMLINK         7
+#define EXT4_FT_MAX             8
+/*
+ * EXT4_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT4_DIR_PAD                    4
+#define EXT4_DIR_ROUND                  (EXT4_DIR_PAD - 1)
+#define EXT4_DIR_REC_LEN(name_len)      (((name_len) + 8 + EXT4_DIR_ROUND) & \
+                                         ~EXT4_DIR_ROUND)
+#define EXT4_MAX_REC_LEN                ((1<<16)-1)
+static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
+{
+        unsigned len = le16_to_cpu(dlen);
+        if (len == EXT4_MAX_REC_LEN)
+                return 1 << 16;
+        return len;
+}
+static inline __le16 ext4_rec_len_to_disk(unsigned len)
+{
+        if (len == (1 << 16))
+                return cpu_to_le16(EXT4_MAX_REC_LEN);
+        else if (len > (1 << 16))
+                BUG();
+        return cpu_to_le16(len);
+}
+/*
+ * Hash Tree Directory indexing
+ * (c) Daniel Phillips, 2001
+ */
+#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
+                                      EXT4_FEATURE_COMPAT_DIR_INDEX) && \
+                      (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
+#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
+#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
+/* Legal values for the dx_root hash_version field: */
+#define DX_HASH_LEGACY          0
+#define DX_HASH_HALF_MD4        1
+#define DX_HASH_TEA             2
+#ifdef __KERNEL__
+/* hash info structure used by the directory hash */
+struct dx_hash_info
+{
+        u32             hash;
+        u32             minor_hash;
+        int             hash_version;
+        u32             *seed;
+};
+#define EXT4_HTREE_EOF  0x7fffffff
+/*
+ * Control parameters used by ext4_htree_next_block
+ */
+#define HASH_NB_ALWAYS          1
+/*
+ * Describe an inode's exact location on disk and in memory
+ */
+struct ext4_iloc
+{
+        struct buffer_head *bh;
+        unsigned long offset;
+        ext4_group_t block_group;
+};
+static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
+{
+        return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
+}
+/*
+ * This structure is stuffed into the struct file's private_data field
+ * for directories.  It is where we put information so that we can do
+ * readdir operations in hash tree order.
+ */
+struct dir_private_info {
+        struct rb_root  root;
+        struct rb_node  *curr_node;
+        struct fname    *extra_fname;
+        loff_t          last_pos;
+        __u32           curr_hash;
+        __u32           curr_minor_hash;
+        __u32           next_hash;
+};
+/* calculate the first block number of the group */
+static inline ext4_fsblk_t
+ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
+{
+        return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
+                le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+}
+/*
+ * Special error return code only used by dx_probe() and its callers.
+ */
+#define ERR_BAD_DX_DIR  -75000
+void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
+                        unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
+/*
+ * Function prototypes
+ */
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext4 source programs needs to include it so they are duplicated here.
+ */
+# define NORET_TYPE     /**/
+# define ATTRIB_NORET   __attribute__((noreturn))
+# define NORET_AND      noreturn,
+/* balloc.c */
+extern unsigned int ext4_block_group(struct super_block *sb,
+                        ext4_fsblk_t blocknr);
+extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
+                        ext4_fsblk_t blocknr);
+extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
+extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
+                        ext4_group_t group);
+extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
+                        ext4_fsblk_t goal, int *errp);
+extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
+                        ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+                        ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+                        ext4_fsblk_t block, unsigned long count, int metadata);
+extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
+                                 ext4_fsblk_t block, unsigned long count,
+                                unsigned long *pdquot_freed_blocks);
+extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
+extern void ext4_check_blocks_bitmap (struct super_block *);
+extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+                                                    ext4_group_t block_group,
+                                                    struct buffer_head ** bh);
+extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+extern void ext4_init_block_alloc_info(struct inode *);
+extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
+/* dir.c */
+extern int ext4_check_dir_entry(const char *, struct inode *,
+                                struct ext4_dir_entry_2 *,
+                                struct buffer_head *, unsigned long);
+extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
+                                    __u32 minor_hash,
+                                    struct ext4_dir_entry_2 *dirent);
+extern void ext4_htree_free_dir_info(struct dir_private_info *p);
+/* fsync.c */
+extern int ext4_sync_file (struct file *, struct dentry *, int);
+/* hash.c */
+extern int ext4fs_dirhash(const char *name, int len, struct
+                          dx_hash_info *hinfo);
+/* ialloc.c */
+extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
+extern void ext4_free_inode (handle_t *, struct inode *);
+extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
+extern unsigned long ext4_count_free_inodes (struct super_block *);
+extern unsigned long ext4_count_dirs (struct super_block *);
+extern void ext4_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
+/* mballoc.c */
+extern long ext4_mb_stats;
+extern long ext4_mb_max_to_scan;
+extern int ext4_mb_init(struct super_block *, int);
+extern int ext4_mb_release(struct super_block *);
+extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
+                                struct ext4_allocation_request *, int *);
+extern int ext4_mb_reserve_blocks(struct super_block *, int);
+extern void ext4_mb_discard_inode_preallocations(struct inode *);
+extern int __init init_ext4_mballoc(void);
+extern void exit_ext4_mballoc(void);
+extern void ext4_mb_free_blocks(handle_t *, struct inode *,
+                unsigned long, unsigned long, int, unsigned long *);
+/* inode.c */
+int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
+                struct buffer_head *bh, ext4_fsblk_t blocknr);
+struct buffer_head *ext4_getblk(handle_t *, struct inode *,
+                                                ext4_lblk_t, int, int *);
+struct buffer_head *ext4_bread(handle_t *, struct inode *,
+                                                ext4_lblk_t, int, int *);
+int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+                                ext4_lblk_t iblock, unsigned long maxblocks,
+                                struct buffer_head *bh_result,
+                                int create, int extend_disksize);
+extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern int  ext4_write_inode (struct inode *, int);
+extern int  ext4_setattr (struct dentry *, struct iattr *);
+extern void ext4_delete_inode (struct inode *);
+extern int  ext4_sync_inode (handle_t *, struct inode *);
+extern void ext4_discard_reservation (struct inode *);
+extern void ext4_dirty_inode(struct inode *);
+extern int ext4_change_inode_journal_flag(struct inode *, int);
+extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern void ext4_truncate (struct inode *);
+extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_get_inode_flags(struct ext4_inode_info *);
+extern void ext4_set_aops(struct inode *inode);
+extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
+                struct address_space *mapping, loff_t from);
+/* ioctl.c */
+extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
+/* migrate.c */
+extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
+                       unsigned long);
+/* namei.c */
+extern int ext4_orphan_add(handle_t *, struct inode *);
+extern int ext4_orphan_del(handle_t *, struct inode *);
+extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+                                __u32 start_minor_hash, __u32 *next_hash);
+/* resize.c */
+extern int ext4_group_add(struct super_block *sb,
+                                struct ext4_new_group_data *input);
+extern int ext4_group_extend(struct super_block *sb,
+                                struct ext4_super_block *es,
+                                ext4_fsblk_t n_blocks_count);
+/* super.c */
+extern void ext4_error (struct super_block *, const char *, const char *, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void __ext4_std_error (struct super_block *, const char *, int);
+extern void ext4_abort (struct super_block *, const char *, const char *, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void ext4_warning (struct super_block *, const char *, const char *, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void ext4_update_dynamic_rev (struct super_block *sb);
+extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
+                                        __u32 compat);
+extern int ext4_update_rocompat_feature(handle_t *handle,
+                                        struct super_block *sb, __u32 rocompat);
+extern int ext4_update_incompat_feature(handle_t *handle,
+                                        struct super_block *sb, __u32 incompat);
+extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+                                      struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
+                                      struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
+                                     struct ext4_group_desc *bg);
+extern void ext4_block_bitmap_set(struct super_block *sb,
+                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_bitmap_set(struct super_block *sb,
+                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_table_set(struct super_block *sb,
+                                 struct ext4_group_desc *bg, ext4_fsblk_t blk);
+static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
+{
+        return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
+                le32_to_cpu(es->s_blocks_count_lo);
+}
+static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
+{
+        return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
+                le32_to_cpu(es->s_r_blocks_count_lo);
+}
+static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
+{
+        return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
+                le32_to_cpu(es->s_free_blocks_count_lo);
+}
+static inline void ext4_blocks_count_set(struct ext4_super_block *es,
+                                         ext4_fsblk_t blk)
+{
+        es->s_blocks_count_lo = cpu_to_le32((u32)blk);
+        es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
+                                              ext4_fsblk_t blk)
+{
+        es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
+        es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
+                                           ext4_fsblk_t blk)
+{
+        es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
+        es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
+{
+        return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
+                le32_to_cpu(raw_inode->i_size_lo);
+}
+static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
+{
+        raw_inode->i_size_lo = cpu_to_le32(i_size);
+        raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
+}
+static inline
+struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+                                                        ext4_group_t group)
+{
+         struct ext4_group_info ***grp_info;
+         long indexv, indexh;
+         grp_info = EXT4_SB(sb)->s_group_info;
+         indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+         indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+         return grp_info[indexv][indexh];
+}
+#define ext4_std_error(sb, errno)                               \
+do {                                                            \
+        if ((errno))                                            \
+                __ext4_std_error((sb), __FUNCTION__, (errno));  \
+} while (0)
+/*
+ * Inodes and files operations
+ */
+/* dir.c */
+extern const struct file_operations ext4_dir_operations;
+/* file.c */
+extern const struct inode_operations ext4_file_inode_operations;
+extern const struct file_operations ext4_file_operations;
+/* namei.c */
+extern const struct inode_operations ext4_dir_inode_operations;
+extern const struct inode_operations ext4_special_inode_operations;
+/* symlink.c */
+extern const struct inode_operations ext4_symlink_inode_operations;
+extern const struct inode_operations ext4_fast_symlink_inode_operations;
+/* extents.c */
+extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
+extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
+                        ext4_lblk_t iblock,
+                        unsigned long max_blocks, struct buffer_head *bh_result,
+                        int create, int extend_disksize);
+extern void ext4_ext_truncate(struct inode *, struct page *);
+extern void ext4_ext_init(struct super_block *);
+extern void ext4_ext_release(struct super_block *);
+extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+                          loff_t len);
+extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
+                        sector_t block, unsigned long max_blocks,
+                        struct buffer_head *bh, int create,
+                        int extend_disksize);
+#endif  /* __KERNEL__ */
+#endif  /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
new file mode 100644
index 000000000000..75333b595fab
--- /dev/null
+++ b/fs/ext4/ext4_extents.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+#ifndef _EXT4_EXTENTS
+#define _EXT4_EXTENTS
+#include "ext4.h"
+/*
+ * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
+ * becomes very small, so index split, in-depth growing and
+ * other hard changes happen much more often.
+ * This is for debug purposes only.
+ */
+#define AGGRESSIVE_TEST_
+/*
+ * With EXTENTS_STATS defined, the number of blocks and extents
+ * are collected in the truncate path. They'll be shown at
+ * umount time.
+ */
+#define EXTENTS_STATS__
+/*
+ * If CHECK_BINSEARCH is defined, then the results of the binary search
+ * will also be checked by linear search.
+ */
+#define CHECK_BINSEARCH__
+/*
+ * If EXT_DEBUG is defined you can use the 'extdebug' mount option
+ * to get lots of info about what's going on.
+ */
+#define EXT_DEBUG__
+#ifdef EXT_DEBUG
+#define ext_debug(a...)         printk(a)
+#else
+#define ext_debug(a...)
+#endif
+/*
+ * If EXT_STATS is defined then stats numbers are collected.
+ * These number will be displayed at umount time.
+ */
+#define EXT_STATS_
+/*
+ * ext4_inode has i_block array (60 bytes total).
+ * The first 12 bytes store ext4_extent_header;
+ * the remainder stores an array of ext4_extent.
+ */
+/*
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
+ */
+struct ext4_extent {
+        __le32  ee_block;       /* first logical block extent covers */
+        __le16  ee_len;         /* number of blocks covered by extent */
+        __le16  ee_start_hi;    /* high 16 bits of physical block */
+        __le32  ee_start_lo;    /* low 32 bits of physical block */
+};
+/*
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
+ */
+struct ext4_extent_idx {
+        __le32  ei_block;       /* index covers logical blocks from 'block' */
+        __le32  ei_leaf_lo;     /* pointer to the physical block of the next *
+                                 * level. leaf or next index could be there */
+        __le16  ei_leaf_hi;     /* high 16 bits of physical block */
+        __u16   ei_unused;
+};
+/*
+ * Each block (leaves and indexes), even inode-stored has header.
+ */
+struct ext4_extent_header {
+        __le16  eh_magic;       /* probably will support different formats */
+        __le16  eh_entries;     /* number of valid entries */
+        __le16  eh_max;         /* capacity of store in entries */
+        __le16  eh_depth;       /* has tree real underlying blocks? */
+        __le32  eh_generation;  /* generation of the tree */
+};
+#define EXT4_EXT_MAGIC          cpu_to_le16(0xf30a)
+/*
+ * Array of ext4_ext_path contains path to some extent.
+ * Creation/lookup routines use it for traversal/splitting/etc.
+ * Truncate uses it to simulate recursive walking.
+ */
+struct ext4_ext_path {
+        ext4_fsblk_t                    p_block;
+        __u16                           p_depth;
+        struct ext4_extent              *p_ext;
+        struct ext4_extent_idx          *p_idx;
+        struct ext4_extent_header       *p_hdr;
+        struct buffer_head              *p_bh;
+};
+/*
+ * structure for external API
+ */
+#define EXT4_EXT_CACHE_NO       0
+#define EXT4_EXT_CACHE_GAP      1
+#define EXT4_EXT_CACHE_EXTENT   2
+#define EXT_MAX_BLOCK   0xffffffff
+/*
+ * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+ * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
+ * MSB of ee_len field in the extent datastructure to signify if this
+ * particular extent is an initialized extent or an uninitialized (i.e.
+ * preallocated).
+ * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
+ * uninitialized extent.
+ * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
+ * uninitialized one. In other words, if MSB of ee_len is set, it is an
+ * uninitialized extent with only one special scenario when ee_len = 0x8000.
+ * In this case we can not have an uninitialized extent of zero length and
+ * thus we make it as a special case of initialized extent with 0x8000 length.
+ * This way we get better extent-to-group alignment for initialized extents.
+ * Hence, the maximum number of blocks we can have in an *initialized*
+ * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
+ */
+#define EXT_INIT_MAX_LEN        (1UL << 15)
+#define EXT_UNINIT_MAX_LEN      (EXT_INIT_MAX_LEN - 1)
+#define EXT_FIRST_EXTENT(__hdr__) \
+        ((struct ext4_extent *) (((char *) (__hdr__)) +         \
+                                 sizeof(struct ext4_extent_header)))
+#define EXT_FIRST_INDEX(__hdr__) \
+        ((struct ext4_extent_idx *) (((char *) (__hdr__)) +     \
+                                     sizeof(struct ext4_extent_header)))
+#define EXT_HAS_FREE_INDEX(__path__) \
+        (le16_to_cpu((__path__)->p_hdr->eh_entries) \
+                                     < le16_to_cpu((__path__)->p_hdr->eh_max))
+#define EXT_LAST_EXTENT(__hdr__) \
+        (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_LAST_INDEX(__hdr__) \
+        (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+        (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+        (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
+{
+        return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
+}
+static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
+{
+        return (struct ext4_extent_header *) bh->b_data;
+}
+static inline unsigned short ext_depth(struct inode *inode)
+{
+        return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
+}
+static inline void ext4_ext_tree_changed(struct inode *inode)
+{
+        EXT4_I(inode)->i_ext_generation++;
+}
+static inline void
+ext4_ext_invalidate_cache(struct inode *inode)
+{
+        EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
+}
+static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
+{
+        /* We can not have an uninitialized extent of zero length! */
+        BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
+        ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
+}
+static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
+{
+        /* Extent with ee_len of 0x8000 is treated as an initialized extent */
+        return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
+}
+static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
+{
+        return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+                le16_to_cpu(ext->ee_len) :
+                (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
+}
+extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
+extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
+extern int ext4_extent_tree_init(handle_t *, struct inode *);
+extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_try_to_merge(struct inode *inode,
+                                 struct ext4_ext_path *path,
+                                 struct ext4_extent *);
+extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
+extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
+extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
+                                                        struct ext4_ext_path *);
+extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
+                                                ext4_lblk_t *, ext4_fsblk_t *);
+extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
+                                                ext4_lblk_t *, ext4_fsblk_t *);
+extern void ext4_ext_drop_refs(struct ext4_ext_path *);
+#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
new file mode 100644
index 000000000000..26a4ae255d79
--- /dev/null
+++ b/fs/ext4/ext4_i.h
@@ -0,0 +1,167 @@
+/*
+ *  ext4_i.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs_i.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+#ifndef _EXT4_I
+#define _EXT4_I
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
+/* data type for block offset of block group */
+typedef int ext4_grpblk_t;
+/* data type for filesystem-wide blocks number */
+typedef unsigned long long ext4_fsblk_t;
+/* data type for file logical block number */
+typedef __u32 ext4_lblk_t;
+/* data type for block group number */
+typedef unsigned long ext4_group_t;
+struct ext4_reserve_window {
+        ext4_fsblk_t    _rsv_start;     /* First byte reserved */
+        ext4_fsblk_t    _rsv_end;       /* Last byte reserved or 0 */
+};
+struct ext4_reserve_window_node {
+        struct rb_node          rsv_node;
+        __u32                   rsv_goal_size;
+        __u32                   rsv_alloc_hit;
+        struct ext4_reserve_window      rsv_window;
+};
+struct ext4_block_alloc_info {
+        /* information about reservation window */
+        struct ext4_reserve_window_node rsv_window_node;
+        /*
+         * was i_next_alloc_block in ext4_inode_info
+         * is the logical (file-relative) number of the
+         * most-recently-allocated block in this file.
+         * We use this for detecting linearly ascending allocation requests.
+         */
+        ext4_lblk_t last_alloc_logical_block;
+        /*
+         * Was i_next_alloc_goal in ext4_inode_info
+         * is the *physical* companion to i_next_alloc_block.
+         * it the physical block number of the block which was most-recentl
+         * allocated to this file.  This give us the goal (target) for the next
+         * allocation when we detect linearly ascending requests.
+         */
+        ext4_fsblk_t last_alloc_physical_block;
+};
+#define rsv_start rsv_window._rsv_start
+#define rsv_end rsv_window._rsv_end
+/*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+        ext4_fsblk_t    ec_start;
+        ext4_lblk_t     ec_block;
+        __u32           ec_len; /* must be 32bit to return holes */
+        __u32           ec_type;
+};
+/*
+ * third extended file system inode data in memory
+ */
+struct ext4_inode_info {
+        __le32  i_data[15];     /* unconverted */
+        __u32   i_flags;
+        ext4_fsblk_t    i_file_acl;
+        __u32   i_dtime;
+        /*
+         * i_block_group is the number of the block group which contains
+         * this file's inode.  Constant across the lifetime of the inode,
+         * it is ued for making block allocation decisions - we try to
+         * place a file's data blocks near its inode block, and new inodes
+         * near to their parent directory's inode.
+         */
+        ext4_group_t    i_block_group;
+        __u32   i_state;                /* Dynamic state flags for ext4 */
+        /* block reservation info */
+        struct ext4_block_alloc_info *i_block_alloc_info;
+        ext4_lblk_t             i_dir_start_lookup;
+#ifdef CONFIG_EXT4DEV_FS_XATTR
+        /*
+         * Extended attributes can be read independently of the main file
+         * data. Taking i_mutex even when reading would cause contention
+         * between readers of EAs and writers of regular file data, so
+         * instead we synchronize on xattr_sem when reading or changing
+         * EAs.
+         */
+        struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+        struct posix_acl        *i_acl;
+        struct posix_acl        *i_default_acl;
+#endif
+        struct list_head i_orphan;      /* unlinked but open inodes */
+        /*
+         * i_disksize keeps track of what the inode size is ON DISK, not
+         * in memory.  During truncate, i_size is set to the new size by
+         * the VFS prior to calling ext4_truncate(), but the filesystem won't
+         * set i_disksize to 0 until the truncate is actually under way.
+         *
+         * The intent is that i_disksize always represents the blocks which
+         * are used by this file.  This allows recovery to restart truncate
+         * on orphans if we crash during truncate.  We actually write i_disksize
+         * into the on-disk inode when writing inodes out, instead of i_size.
+         *
+         * The only time when i_disksize and i_size may be different is when
+         * a truncate is in progress.  The only things which change i_disksize
+         * are ext4_get_block (growth) and ext4_truncate (shrinkth).
+         */
+        loff_t  i_disksize;
+        /* on-disk additional length */
+        __u16 i_extra_isize;
+        /*
+         * i_data_sem is for serialising ext4_truncate() against
+         * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
+         * data tree are chopped off during truncate. We can't do that in
+         * ext4 because whenever we perform intermediate commits during
+         * truncate, the inode and all the metadata blocks *must* be in a
+         * consistent state which allows truncation of the orphans to restart
+         * during recovery.  Hence we must fix the get_block-vs-truncate race
+         * by other means, so we have i_data_sem.
+         */
+        struct rw_semaphore i_data_sem;
+        struct inode vfs_inode;
+        unsigned long i_ext_generation;
+        struct ext4_ext_cache i_cached_extent;
+        /*
+         * File creation time. Its function is same as that of
+         * struct timespec i_{a,c,m}time in the generic inode.
+         */
+        struct timespec i_crtime;
+        /* mballoc */
+        struct list_head i_prealloc_list;
+        spinlock_t i_prealloc_lock;
+};
+#endif  /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
 * Interface between ext4 and JBD
 */
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
 int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
                                struct buffer_head *bh)
 {
        int err = jbd2_journal_get_undo_access(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
 {
        int err = jbd2_journal_get_write_access(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
 {
        int err = jbd2_journal_forget(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
 {
        int err = jbd2_journal_revoke(handle, blocknr, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
 {
        int err = jbd2_journal_get_create_access(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
 {
        int err = jbd2_journal_dirty_metadata(handle, bh);
        if (err)
-                ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+                ext4_journal_abort_handle(where, __func__, bh, handle, err);
        return err;
 }
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
new file mode 100644
index 000000000000..9255a7d28b24
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.h
@@ -0,0 +1,231 @@
+/*
+ * ext4_jbd2.h
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Ext4-specific journaling extensions.
+ */
+#ifndef _EXT4_JBD2_H
+#define _EXT4_JBD2_H
+#include <linux/fs.h>
+#include <linux/jbd2.h>
+#include "ext4.h"
+#define EXT4_JOURNAL(inode)     (EXT4_SB((inode)->i_sb)->s_journal)
+/* Define the number of blocks we need to account to a transaction to
+ * modify one block of data.
+ *
+ * We may have to touch one inode, one bitmap buffer, up to three
+ * indirection blocks, the group and superblock summaries, and the data
+ * block to complete the transaction.
+ *
+ * For extents-enabled fs we may have to allocate and modify up to
+ * 5 levels of tree + root which are stored in the inode. */
+#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb)                                \
+        (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
+                || test_opt(sb, EXTENTS) ? 27U : 8U)
+/* Extended attribute operations touch at most two data buffers,
+ * two bitmap buffers, and two group summaries, in addition to the inode
+ * and the superblock, which are already accounted for. */
+#define EXT4_XATTR_TRANS_BLOCKS         6U
+/* Define the minimum size for a transaction which modifies data.  This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota).  The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
+#define EXT4_DATA_TRANS_BLOCKS(sb)      (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
+                                         EXT4_XATTR_TRANS_BLOCKS - 2 + \
+                                         2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+/* Delete operations potentially hit one directory's namespace plus an
+ * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+ * generous.  We can grow the delete transaction later if necessary. */
+#define EXT4_DELETE_TRANS_BLOCKS(sb)    (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
+/* Define an arbitrary limit for the amount of data we will anticipate
+ * writing to any given transaction.  For unbounded transactions such as
+ * write(2) and truncate(2) we can write more than this, but we always
+ * start off at the maximum transaction size and grow the transaction
+ * optimistically as we go. */
+#define EXT4_MAX_TRANS_DATA             64U
+/* We break up a large truncate or write transaction once the handle's
+ * buffer credits gets this low, we need either to extend the
+ * transaction or to start a new one.  Reserve enough space here for
+ * inode, bitmap, superblock, group and indirection updates for at least
+ * one block, plus two quota updates.  Quota allocations are not
+ * needed. */
+#define EXT4_RESERVE_TRANS_BLOCKS       12U
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS   8
+#ifdef CONFIG_QUOTA
+/* Amount of blocks needed for quota update - we know that the structure was
+ * allocated so we need to update only inode+data */
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+/* Amount of blocks needed for quota insert/delete - we do some block writes
+ * but inode, sb and group updates are done only once */
+#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+                (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+                (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+#else
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+#endif
+int
+ext4_mark_iloc_dirty(handle_t *handle,
+                     struct inode *inode,
+                     struct ext4_iloc *iloc);
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh.  This _must_ be cleaned up later.
+ */
+int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
+                        struct ext4_iloc *iloc);
+int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
+/*
+ * Wrapper functions with which ext4 calls into JBD.  The intent here is
+ * to allow these to be turned into appropriate stubs so ext4 can control
+ * ext2 filesystems, so ext2+ext4 systems only nee one fs.  This work hasn't
+ * been done yet.
+ */
+static inline void ext4_journal_release_buffer(handle_t *handle,
+                                                struct buffer_head *bh)
+{
+        jbd2_journal_release_buffer(handle, bh);
+}
+void ext4_journal_abort_handle(const char *caller, const char *err_fn,
+                struct buffer_head *bh, handle_t *handle, int err);
+int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
+                                struct buffer_head *bh);
+int __ext4_journal_get_write_access(const char *where, handle_t *handle,
+                                struct buffer_head *bh);
+int __ext4_journal_forget(const char *where, handle_t *handle,
+                                struct buffer_head *bh);
+int __ext4_journal_revoke(const char *where, handle_t *handle,
+                                ext4_fsblk_t blocknr, struct buffer_head *bh);
+int __ext4_journal_get_create_access(const char *where,
+                                handle_t *handle, struct buffer_head *bh);
+int __ext4_journal_dirty_metadata(const char *where,
+                                handle_t *handle, struct buffer_head *bh);
+#define ext4_journal_get_undo_access(handle, bh) \
+        __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_get_write_access(handle, bh) \
+        __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_revoke(handle, blocknr, bh) \
+        __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+#define ext4_journal_get_create_access(handle, bh) \
+        __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_dirty_metadata(handle, bh) \
+        __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+#define ext4_journal_forget(handle, bh) \
+        __ext4_journal_forget(__FUNCTION__, (handle), (bh))
+int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+int __ext4_journal_stop(const char *where, handle_t *handle);
+static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
+{
+        return ext4_journal_start_sb(inode->i_sb, nblocks);
+}
+#define ext4_journal_stop(handle) \
+        __ext4_journal_stop(__FUNCTION__, (handle))
+static inline handle_t *ext4_journal_current_handle(void)
+{
+        return journal_current_handle();
+}
+static inline int ext4_journal_extend(handle_t *handle, int nblocks)
+{
+        return jbd2_journal_extend(handle, nblocks);
+}
+static inline int ext4_journal_restart(handle_t *handle, int nblocks)
+{
+        return jbd2_journal_restart(handle, nblocks);
+}
+static inline int ext4_journal_blocks_per_page(struct inode *inode)
+{
+        return jbd2_journal_blocks_per_page(inode);
+}
+static inline int ext4_journal_force_commit(journal_t *journal)
+{
+        return jbd2_journal_force_commit(journal);
+}
+/* super.c */
+int ext4_force_commit(struct super_block *sb);
+static inline int ext4_should_journal_data(struct inode *inode)
+{
+        if (!S_ISREG(inode->i_mode))
+                return 1;
+        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+                return 1;
+        if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+                return 1;
+        return 0;
+}
+static inline int ext4_should_order_data(struct inode *inode)
+{
+        if (!S_ISREG(inode->i_mode))
+                return 0;
+        if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+                return 0;
+        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+                return 1;
+        return 0;
+}
+static inline int ext4_should_writeback_data(struct inode *inode)
+{
+        if (!S_ISREG(inode->i_mode))
+                return 0;
+        if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+                return 0;
+        if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+                return 1;
+        return 0;
+}
+#endif  /* _EXT4_JBD2_H */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
new file mode 100644
index 000000000000..5802e69f2191
--- /dev/null
+++ b/fs/ext4/ext4_sb.h
@@ -0,0 +1,148 @@
+/*
+ *  ext4_sb.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs_sb.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+#ifndef _EXT4_SB
+#define _EXT4_SB
+#ifdef __KERNEL__
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
+#endif
+#include <linux/rbtree.h>
+/*
+ * third extended-fs super-block data in memory
+ */
+struct ext4_sb_info {
+        unsigned long s_desc_size;      /* Size of a group descriptor in bytes */
+        unsigned long s_inodes_per_block;/* Number of inodes per block */
+        unsigned long s_blocks_per_group;/* Number of blocks in a group */
+        unsigned long s_inodes_per_group;/* Number of inodes in a group */
+        unsigned long s_itb_per_group;  /* Number of inode table blocks per group */
+        unsigned long s_gdb_count;      /* Number of group descriptor blocks */
+        unsigned long s_desc_per_block; /* Number of group descriptors per block */
+        ext4_group_t s_groups_count;    /* Number of groups in the fs */
+        unsigned long s_overhead_last;  /* Last calculated overhead */
+        unsigned long s_blocks_last;    /* Last seen block count */
+        loff_t s_bitmap_maxbytes;       /* max bytes for bitmap files */
+        struct buffer_head * s_sbh;     /* Buffer containing the super block */
+        struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
+        struct buffer_head ** s_group_desc;
+        unsigned long  s_mount_opt;
+        ext4_fsblk_t s_sb_block;
+        uid_t s_resuid;
+        gid_t s_resgid;
+        unsigned short s_mount_state;
+        unsigned short s_pad;
+        int s_addr_per_block_bits;
+        int s_desc_per_block_bits;
+        int s_inode_size;
+        int s_first_ino;
+        spinlock_t s_next_gen_lock;
+        u32 s_next_generation;
+        u32 s_hash_seed[4];
+        int s_def_hash_version;
+        struct percpu_counter s_freeblocks_counter;
+        struct percpu_counter s_freeinodes_counter;
+        struct percpu_counter s_dirs_counter;
+        struct blockgroup_lock s_blockgroup_lock;
+        /* root of the per fs reservation window tree */
+        spinlock_t s_rsv_window_lock;
+        struct rb_root s_rsv_window_root;
+        struct ext4_reserve_window_node s_rsv_window_head;
+        /* Journaling */
+        struct inode * s_journal_inode;
+        struct journal_s * s_journal;
+        struct list_head s_orphan;
+        unsigned long s_commit_interval;
+        struct block_device *journal_bdev;
+#ifdef CONFIG_JBD2_DEBUG
+        struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
+        wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+        int s_jquota_fmt;                       /* Format of quota to use */
+#endif
+        unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+#ifdef EXTENTS_STATS
+        /* ext4 extents stats */
+        unsigned long s_ext_min;
+        unsigned long s_ext_max;
+        unsigned long s_depth_max;
+        spinlock_t s_ext_stats_lock;
+        unsigned long s_ext_blocks;
+        unsigned long s_ext_extents;
+#endif
+        /* for buddy allocator */
+        struct ext4_group_info ***s_group_info;
+        struct inode *s_buddy_cache;
+        long s_blocks_reserved;
+        spinlock_t s_reserve_lock;
+        struct list_head s_active_transaction;
+        struct list_head s_closed_transaction;
+        struct list_head s_committed_transaction;
+        spinlock_t s_md_lock;
+        tid_t s_last_transaction;
+        unsigned short *s_mb_offsets, *s_mb_maxs;
+        /* tunables */
+        unsigned long s_stripe;
+        unsigned long s_mb_stream_request;
+        unsigned long s_mb_max_to_scan;
+        unsigned long s_mb_min_to_scan;
+        unsigned long s_mb_stats;
+        unsigned long s_mb_order2_reqs;
+        unsigned long s_mb_group_prealloc;
+        /* where last allocation was done - for stream allocation */
+        unsigned long s_mb_last_group;
+        unsigned long s_mb_last_start;
+        /* history to debug policy */
+        struct ext4_mb_history *s_mb_history;
+        int s_mb_history_cur;
+        int s_mb_history_max;
+        int s_mb_history_num;
+        struct proc_dir_entry *s_mb_proc;
+        spinlock_t s_mb_history_lock;
+        int s_mb_history_filter;
+        /* stats for buddy allocator */
+        spinlock_t s_mb_pa_lock;
+        atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
+        atomic_t s_bal_success; /* we found long enough chunks */
+        atomic_t s_bal_allocated;       /* in blocks */
+        atomic_t s_bal_ex_scanned;      /* total extents scanned */
+        atomic_t s_bal_goals;   /* goal hits */
+        atomic_t s_bal_breaks;  /* too long searches */
+        atomic_t s_bal_2orders; /* 2^order hits */
+        spinlock_t s_bal_lock;
+        unsigned long s_mb_buddies_generated;
+        unsigned long long s_mb_generation_time;
+        atomic_t s_mb_lost_chunks;
+        atomic_t s_mb_preallocated;
+        atomic_t s_mb_discarded;
+        /* locality groups */
+        struct ext4_locality_group *s_locality_groups;
+};
+#endif  /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -40,8 +39,9 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/falloc.h>
-#include <linux/ext4_fs_extents.h>
 #include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
 /*
@@ -308,7 +308,7 @@ corrupted:
 }
 #define ext4_ext_check_header(inode, eh, depth) \
-        __ext4_ext_check_header(__FUNCTION__, inode, eh, depth)
+        __ext4_ext_check_header(__func__, inode, eh, depth)
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
        ix->ei_block = cpu_to_le32(logical);
        ext4_idx_store_pblock(ix, ptr);
-        curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+        le16_add_cpu(&curp->p_hdr->eh_entries, 1);
        BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
                             > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        }
        if (m) {
                memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
-                neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+                le16_add_cpu(&neh->eh_entries, m);
        }
        set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
                        goto cleanup;
-                path[depth].p_hdr->eh_entries =
+                le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
-                     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                if (m) {
                        memmove(++fidx, path[i].p_idx - m,
                                sizeof(struct ext4_extent_idx) * m);
-                        neh->eh_entries =
+                        le16_add_cpu(&neh->eh_entries, m);
-                                cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
                }
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
                        err = ext4_ext_get_access(handle, inode, path + i);
                        if (err)
                                goto cleanup;
-                        path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+                        le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
                        err = ext4_ext_dirty(handle, inode, path + i);
                        if (err)
                                goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
                                * sizeof(struct ext4_extent);
                        memmove(ex + 1, ex + 2, len);
                }
-                eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
+                le16_add_cpu(&eh->eh_entries, -1);
                merge_done = 1;
                WARN_ON(eh->eh_entries == 0);
                if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
                path[depth].p_ext = nearex;
        }
-        eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+        le16_add_cpu(&eh->eh_entries, 1);
        nearex = path[depth].p_ext;
        nearex->ee_block = newext->ee_block;
        ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        err = ext4_ext_get_access(handle, inode, path);
        if (err)
                return err;
-        path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+        le16_add_cpu(&path->p_hdr->eh_entries, -1);
        err = ext4_ext_dirty(handle, inode, path);
        if (err)
                return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (num == 0) {
                        /* this extent is removed; mark slot entirely unused */
                        ext4_ext_store_pblock(ex, 0);
-                        eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+                        le16_add_cpu(&eh->eh_entries, -1);
                }
                ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
         * We start scanning from right side, freeing all the blocks
         * after i_size and walking into the tree depth-wise.
         */
-        path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+        path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
        if (path == NULL) {
                ext4_journal_stop(handle);
                return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
+static void bi_complete(struct bio *bio, int error)
+{
+        complete((struct completion *)bio->bi_private);
+}
+/* FIXME!! we need to try to merge to left or right after zero-out  */
+static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+        int ret = -EIO;
+        struct bio *bio;
+        int blkbits, blocksize;
+        sector_t ee_pblock;
+        struct completion event;
+        unsigned int ee_len, len, done, offset;
+        blkbits   = inode->i_blkbits;
+        blocksize = inode->i_sb->s_blocksize;
+        ee_len    = ext4_ext_get_actual_len(ex);
+        ee_pblock = ext_pblock(ex);
+        /* convert ee_pblock to 512 byte sectors */
+        ee_pblock = ee_pblock << (blkbits - 9);
+        while (ee_len > 0) {
+                if (ee_len > BIO_MAX_PAGES)
+                        len = BIO_MAX_PAGES;
+                else
+                        len = ee_len;
+                bio = bio_alloc(GFP_NOIO, len);
+                if (!bio)
+                        return -ENOMEM;
+                bio->bi_sector = ee_pblock;
+                bio->bi_bdev   = inode->i_sb->s_bdev;
+                done = 0;
+                offset = 0;
+                while (done < len) {
+                        ret = bio_add_page(bio, ZERO_PAGE(0),
+                                                        blocksize, offset);
+                        if (ret != blocksize) {
+                                /*
+                                 * We can't add any more pages because of
+                                 * hardware limitations.  Start a new bio.
+                                 */
+                                break;
+                        }
+                        done++;
+                        offset += blocksize;
+                        if (offset >= PAGE_CACHE_SIZE)
+                                offset = 0;
+                }
+                init_completion(&event);
+                bio->bi_private = &event;
+                bio->bi_end_io = bi_complete;
+                submit_bio(WRITE, bio);
+                wait_for_completion(&event);
+                if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+                        ret = 0;
+                else {
+                        ret = -EIO;
+                        break;
+                }
+                bio_put(bio);
+                ee_len    -= done;
+                ee_pblock += done  << (blkbits - 9);
+        }
+        return ret;
+}
+#define EXT4_EXT_ZERO_LEN 7
 /*
 * This function is called by ext4_ext_get_blocks() if someone tries to write
 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                                ext4_lblk_t iblock,
                                                unsigned long max_blocks)
 {
-        struct ext4_extent *ex, newex;
+        struct ext4_extent *ex, newex, orig_ex;
        struct ext4_extent *ex1 = NULL;
        struct ext4_extent *ex2 = NULL;
        struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        allocated = ee_len - (iblock - ee_block);
        newblock = iblock - ee_block + ext_pblock(ex);
        ex2 = ex;
+        orig_ex.ee_block = ex->ee_block;
+        orig_ex.ee_len   = cpu_to_le16(ee_len);
+        ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
                goto out;
+        /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
+        if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+                err =  ext4_ext_zeroout(inode, &orig_ex);
+                if (err)
+                        goto fix_extent_len;
+                /* update the extent length and mark as initialized */
+                ex->ee_block = orig_ex.ee_block;
+                ex->ee_len   = orig_ex.ee_len;
+                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                ext4_ext_dirty(handle, inode, path + depth);
+                /* zeroed the full extent */
+                return allocated;
+        }
        /* ex1: ee_block to iblock - 1 : uninitialized */
        if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        /* ex3: to ee_block + ee_len : uninitialised */
        if (allocated > max_blocks) {
                unsigned int newdepth;
+                /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
+                if (allocated <= EXT4_EXT_ZERO_LEN) {
+                        /* Mark first half uninitialized.
+                         * Mark second half initialized and zero out the
+                         * initialized extent
+                         */
+                        ex->ee_block = orig_ex.ee_block;
+                        ex->ee_len   = cpu_to_le16(ee_len - allocated);
+                        ext4_ext_mark_uninitialized(ex);
+                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                        ext4_ext_dirty(handle, inode, path + depth);
+                        ex3 = &newex;
+                        ex3->ee_block = cpu_to_le32(iblock);
+                        ext4_ext_store_pblock(ex3, newblock);
+                        ex3->ee_len = cpu_to_le16(allocated);
+                        err = ext4_ext_insert_extent(handle, inode, path, ex3);
+                        if (err == -ENOSPC) {
+                                err =  ext4_ext_zeroout(inode, &orig_ex);
+                                if (err)
+                                        goto fix_extent_len;
+                                ex->ee_block = orig_ex.ee_block;
+                                ex->ee_len   = orig_ex.ee_len;
+                                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                                ext4_ext_dirty(handle, inode, path + depth);
+                                /* zeroed the full extent */
+                                return allocated;
+                        } else if (err)
+                                goto fix_extent_len;
+                        /*
+                         * We need to zero out the second half because
+                         * an fallocate request can update file size and
+                         * converting the second half to initialized extent
+                         * implies that we can leak some junk data to user
+                         * space.
+                         */
+                        err =  ext4_ext_zeroout(inode, ex3);
+                        if (err) {
+                                /*
+                                 * We should actually mark the
+                                 * second half as uninit and return error
+                                 * Insert would have changed the extent
+                                 */
+                                depth = ext_depth(inode);
+                                ext4_ext_drop_refs(path);
+                                path = ext4_ext_find_extent(inode,
+                                                                iblock, path);
+                                if (IS_ERR(path)) {
+                                        err = PTR_ERR(path);
+                                        return err;
+                                }
+                                ex = path[depth].p_ext;
+                                err = ext4_ext_get_access(handle, inode,
+                                                                path + depth);
+                                if (err)
+                                        return err;
+                                ext4_ext_mark_uninitialized(ex);
+                                ext4_ext_dirty(handle, inode, path + depth);
+                                return err;
+                        }
+                        /* zeroed the second half */
+                        return allocated;
+                }
                ex3 = &newex;
                ex3->ee_block = cpu_to_le32(iblock + max_blocks);
                ext4_ext_store_pblock(ex3, newblock + max_blocks);
                ex3->ee_len = cpu_to_le16(allocated - max_blocks);
                ext4_ext_mark_uninitialized(ex3);
                err = ext4_ext_insert_extent(handle, inode, path, ex3);
-                if (err)
+                if (err == -ENOSPC) {
-                        goto out;
+                        err =  ext4_ext_zeroout(inode, &orig_ex);
+                        if (err)
+                                goto fix_extent_len;
+                        /* update the extent length and mark as initialized */
+                        ex->ee_block = orig_ex.ee_block;
+                        ex->ee_len   = orig_ex.ee_len;
+                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                        ext4_ext_dirty(handle, inode, path + depth);
+                        /* zeroed the full extent */
+                        return allocated;
+                } else if (err)
+                        goto fix_extent_len;
                /*
                 * The depth, and hence eh & ex might change
                 * as part of the insert above.
                 */
                newdepth = ext_depth(inode);
+                /*
+                 * update the extent length after successfull insert of the
+                 * split extent
+                 */
+                orig_ex.ee_len = cpu_to_le16(ee_len -
+                                                ext4_ext_get_actual_len(ex3));
                if (newdepth != depth) {
                        depth = newdepth;
                        ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                goto out;
                }
                allocated = max_blocks;
+                /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
+                 * to insert a extent in the middle zerout directly
+                 * otherwise give the extent a chance to merge to left
+                 */
+                if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
+                                                        iblock != ee_block) {
+                        err =  ext4_ext_zeroout(inode, &orig_ex);
+                        if (err)
+                                goto fix_extent_len;
+                        /* update the extent length and mark as initialized */
+                        ex->ee_block = orig_ex.ee_block;
+                        ex->ee_len   = orig_ex.ee_len;
+                        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                        ext4_ext_dirty(handle, inode, path + depth);
+                        /* zero out the first half */
+                        return allocated;
+                }
        }
        /*
         * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        goto out;
 insert:
        err = ext4_ext_insert_extent(handle, inode, path, &newex);
+        if (err == -ENOSPC) {
+                err =  ext4_ext_zeroout(inode, &orig_ex);
+                if (err)
+                        goto fix_extent_len;
+                /* update the extent length and mark as initialized */
+                ex->ee_block = orig_ex.ee_block;
+                ex->ee_len   = orig_ex.ee_len;
+                ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+                ext4_ext_dirty(handle, inode, path + depth);
+                /* zero out the first half */
+                return allocated;
+        } else if (err)
+                goto fix_extent_len;
 out:
        return err ? err : allocated;
+fix_extent_len:
+        ex->ee_block = orig_ex.ee_block;
+        ex->ee_len   = orig_ex.ee_len;
+        ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+        ext4_ext_mark_uninitialized(ex);
+        ext4_ext_dirty(handle, inode, path + depth);
+        return err;
 }
 /*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        }
                        if (create == EXT4_CREATE_UNINITIALIZED_EXT)
                                goto out;
-                        if (!create)
+                        if (!create) {
+                                /*
+                                 * We have blocks reserved already.  We
+                                 * return allocated blocks so that delalloc
+                                 * won't do block reservation for us.  But
+                                 * the buffer head will be unmapped so that
+                                 * a read from the block returns 0s.
+                                 */
+                                if (allocated > max_blocks)
+                                        allocated = max_blocks;
+                                /* mark the buffer unwritten */
+                                __set_bit(BH_Unwritten, &bh_result->b_state);
                                goto out2;
+                        }
                        ret = ext4_ext_convert_to_initialized(handle, inode,
                                                                path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
                ext4_orphan_del(handle, inode);
        up_write(&EXT4_I(inode)->i_data_sem);
+        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+        ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 }
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
        return needed;
 }
+static void ext4_falloc_update_inode(struct inode *inode,
+                                int mode, loff_t new_size, int update_ctime)
+{
+        struct timespec now;
+        if (update_ctime) {
+                now = current_fs_time(inode->i_sb);
+                if (!timespec_equal(&inode->i_ctime, &now))
+                        inode->i_ctime = now;
+        }
+        /*
+         * Update only when preallocation was requested beyond
+         * the file size.
+         */
+        if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+                                new_size > i_size_read(inode)) {
+                i_size_write(inode, new_size);
+                EXT4_I(inode)->i_disksize = new_size;
+        }
+}
 /*
 * preallocate space for a file. This implements ext4's fallocate inode
 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
 {
        handle_t *handle;
        ext4_lblk_t block;
+        loff_t new_size;
        unsigned long max_blocks;
-        ext4_fsblk_t nblocks = 0;
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
                return -ENODEV;
        block = offset >> blkbits;
+        /*
+         * We can't just convert len to max_blocks because
+         * If blocksize = 4096 offset = 3072 and len = 2048
+         */
        max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
-                        - block;
+                                                        - block;
        /*
         * credits to insert 1 extent into extent tree + buffers to be able to
         * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
                        ret = PTR_ERR(handle);
                        break;
                }
                ret = ext4_get_blocks_wrap(handle, inode, block,
                                          max_blocks, &map_bh,
                                          EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
                        ret2 = ext4_journal_stop(handle);
                        break;
                }
-                if (ret > 0) {
+                if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
-                        /* check wrap through sign-bit/zero here */
+                                                blkbits) >> blkbits))
-                        if ((block + ret) < 0 || (block + ret) < block) {
+                        new_size = offset + len;
-                                ret = -EIO;
+                else
-                                ext4_mark_inode_dirty(handle, inode);
+                        new_size = (block + ret) << blkbits;
-                                ret2 = ext4_journal_stop(handle);
-                                break;
-                        }
-                        if (buffer_new(&map_bh) && ((block + ret) >
-                            (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
-                            >> blkbits)))
-                                        nblocks = nblocks + ret;
-                }
-                /* Update ctime if new blocks get allocated */
-                if (nblocks) {
-                        struct timespec now;
-                        now = current_fs_time(inode->i_sb);
-                        if (!timespec_equal(&inode->i_ctime, &now))
-                                inode->i_ctime = now;
-                }
+                ext4_falloc_update_inode(inode, mode, new_size,
+                                                buffer_new(&map_bh));
                ext4_mark_inode_dirty(handle, inode);
                ret2 = ext4_journal_stop(handle);
                if (ret2)
                        break;
        }
+        if (ret == -ENOSPC &&
-        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+                        ext4_should_retry_alloc(inode->i_sb, &retries)) {
+                ret = 0;
                goto retry;
-        /*
-         * Time to update the file size.
-         * Update only when preallocation was requested beyond the file size.
-         */
-        if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-            (offset + len) > i_size_read(inode)) {
-                if (ret > 0) {
-                        /*
-                         * if no error, we assume preallocation succeeded
-                         * completely
-                         */
-                        i_size_write(inode, offset + len);
-                        EXT4_I(inode)->i_disksize = i_size_read(inode);
-                } else if (ret < 0 && nblocks) {
-                        /* Handle partial allocation scenario */
-                        loff_t newsize;
-                        newsize  = (nblocks << blkbits) + i_size_read(inode);
-                        i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
-                        EXT4_I(inode)->i_disksize = i_size_read(inode);
-                }
        }
        mutex_unlock(&inode->i_mutex);
        return ret > 0 ? ret2 : ret;
 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
        .write          = do_sync_write,
        .aio_read       = generic_file_aio_read,
        .aio_write      = ext4_file_write,
-        .ioctl          = ext4_ioctl,
+        .unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ext4_compat_ioctl,
 #endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
 /*
 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
                goto out;
        }
+        if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+                goto out;
        /*
         * The VFS has written the file data.  If the inode is unaltered
         * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/cryptohash.h>
+#include "ext4.h"
 #define DELTA 0x9E3779B9
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
@@ -25,7 +23,8 @@
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
 #include <asm/byteorder.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
-                ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
+                ext4_error(sb, __func__, "Checksum bad for group %lu\n",
                           block_group);
                gdp->bg_free_blocks_count = 0;
                gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
                if (gdp) {
                        spin_lock(sb_bgl_lock(sbi, block_group));
-                        gdp->bg_free_inodes_count = cpu_to_le16(
+                        le16_add_cpu(&gdp->bg_free_inodes_count, 1);
-                                le16_to_cpu(gdp->bg_free_inodes_count) + 1);
                        if (is_directory)
-                                gdp->bg_used_dirs_count = cpu_to_le16(
+                                le16_add_cpu(&gdp->bg_used_dirs_count, -1);
-                                  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
                        gdp->bg_checksum = ext4_group_desc_csum(sbi,
                                                        block_group, gdp);
                        spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
        ino++;
        if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
            ino > EXT4_INODES_PER_GROUP(sb)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                           "reserved inode or inode > inodes count - "
                           "block_group = %lu, inode=%lu", group,
                           ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
                                cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
        }
-        gdp->bg_free_inodes_count =
+        le16_add_cpu(&gdp->bg_free_inodes_count, -1);
-                cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
        if (S_ISDIR(mode)) {
-                gdp->bg_used_dirs_count =
+                le16_add_cpu(&gdp->bg_used_dirs_count, 1);
-                        cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
        }
        gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
        spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
        if (err)
                goto fail_free_drop;
-        err = ext4_mark_inode_dirty(handle, inode);
-        if (err) {
-                ext4_std_error(sb, err);
-                goto fail_free_drop;
-        }
        if (test_opt(sb, EXTENTS)) {
-                /* set extent flag only for directory and file */
+                /* set extent flag only for diretory, file and normal symlink*/
-                if (S_ISDIR(mode) || S_ISREG(mode)) {
+                if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
                        EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
                        ext4_ext_tree_init(handle, inode);
                        err = ext4_update_incompat_feature(handle, sb,
                                        EXT4_FEATURE_INCOMPAT_EXTENTS);
                        if (err)
-                                goto fail;
+                                goto fail_free_drop;
                }
        }
+        err = ext4_mark_inode_dirty(handle, inode);
+        if (err) {
+                ext4_std_error(sb, err);
+                goto fail_free_drop;
+        }
        ext4_debug("allocating inode %lu\n", inode->i_ino);
        goto really_out;
 fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        /* Error cases - e2fsck has already cleaned up for us */
        if (ino > max_ino) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "bad orphan ino %lu!  e2fsck was run?", ino);
                goto error;
        }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
        bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
        bitmap_bh = read_inode_bitmap(sb, block_group);
        if (!bitmap_bh) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "inode bitmap error for orphan %lu", ino);
                goto error;
        }
@@ -830,7 +826,7 @@ iget_failed:
        err = PTR_ERR(inode);
        inode = NULL;
 bad_orphan:
-        ext4_warning(sb, __FUNCTION__,
+        ext4_warning(sb, __func__,
                     "bad orphan inode %lu!  e2fsck was run?", ino);
        printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
               bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/jbd2.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -36,6 +35,7 @@
 #include <linux/mpage.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
        BUFFER_TRACE(bh, "call ext4_journal_revoke");
        err = ext4_journal_revoke(handle, blocknr, bh);
        if (err)
-                ext4_abort(inode->i_sb, __FUNCTION__,
+                ext4_abort(inode->i_sb, __func__,
                           "error %d when attempting revoke", err);
        BUFFER_TRACE(bh, "exit");
        return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
        } else {
                retval = ext4_get_blocks_handle(handle, inode, block,
                                max_blocks, bh, create, extend_disksize);
+                if (retval > 0 && buffer_new(bh)) {
+                        /*
+                         * We allocated new blocks which will result in
+                         * i_data's format changing.  Force the migrate
+                         * to fail by clearing migrate flags
+                         */
+                        EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+                                                        ~EXT4_EXT_MIGRATE;
+                }
        }
        up_write((&EXT4_I(inode)->i_data_sem));
        return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
        int err = jbd2_journal_dirty_data(handle, bh);
        if (err)
-                ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+                ext4_journal_abort_handle(__func__, __func__,
                                                bh, handle, err);
        return err;
 }
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
                new_i_size = pos + copied;
                if (new_i_size > EXT4_I(inode)->i_disksize)
                        EXT4_I(inode)->i_disksize = new_i_size;
-                copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+                ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-                if (copied < 0)
+                copied = ret2;
-                        ret = copied;
+                if (ret2 < 0)
+                        ret = ret2;
        }
        ret2 = ext4_journal_stop(handle);
        if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
        if (new_i_size > EXT4_I(inode)->i_disksize)
                EXT4_I(inode)->i_disksize = new_i_size;
-        copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+        ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
                                                        page, fsdata);
-        if (copied < 0)
+        copied = ret2;
-                ret = copied;
+        if (ret2 < 0)
+                ret = ret2;
        ret2 = ext4_journal_stop(handle);
        if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
 static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
                unsigned long ino, struct ext4_iloc *iloc)
 {
-        unsigned long desc, group_desc;
        ext4_group_t block_group;
        unsigned long offset;
        ext4_fsblk_t block;
-        struct buffer_head *bh;
+        struct ext4_group_desc *gdp;
-        struct ext4_group_desc * gdp;
        if (!ext4_valid_inum(sb, ino)) {
                /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
        }
        block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
-        if (block_group >= EXT4_SB(sb)->s_groups_count) {
+        gdp = ext4_get_group_desc(sb, block_group, NULL);
-                ext4_error(sb,"ext4_get_inode_block","group >= groups count");
+        if (!gdp)
                return 0;
-        }
-        smp_rmb();
-        group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
-        desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-        bh = EXT4_SB(sb)->s_group_desc[group_desc];
-        if (!bh) {
-                ext4_error (sb, "ext4_get_inode_block",
-                            "Descriptor not loaded");
-                return 0;
-        }
-        gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
-                desc * EXT4_DESC_SIZE(sb));
        /*
         * Figure out the offset within the block group inode table
         */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
        if (ext4_inode_blocks_set(handle, raw_inode, ei))
                goto out_brelse;
        raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
-        raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+        /* clear the migrate flag in the raw_inode */
+        raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
        if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
            cpu_to_le32(EXT4_OS_HURD))
                raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
                                EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
                                if (mnt_count !=
                                        le16_to_cpu(sbi->s_es->s_mnt_count)) {
-                                        ext4_warning(inode->i_sb, __FUNCTION__,
+                                        ext4_warning(inode->i_sb, __func__,
                                        "Unable to expand inode %lu. Delete"
                                        " some EAs or run e2fsck.",
                                        inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
                current_handle->h_transaction != handle->h_transaction) {
                /* This task has a transaction open against a different fs */
                printk(KERN_EMERG "%s: transactions do not match!\n",
-                       __FUNCTION__);
+                       __func__);
        } else {
                jbd_debug(5, "marking dirty.  outer handle=%p\n",
                                current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
 #include <linux/capability.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
-int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-                unsigned long arg)
 {
+        struct inode *inode = filp->f_dentry->d_inode;
        struct ext4_inode_info *ei = EXT4_I(inode);
        unsigned int flags;
        unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
 #ifdef CONFIG_COMPAT
 long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
-        int ret;
        /* These are just misnamed, they actually get/put from/to user an int */
        switch (cmd) {
        case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        default:
                return -ENOIOCTLCMD;
        }
-        lock_kernel();
+        return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
-        ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
-        unlock_kernel();
-        return ret;
 }
 #endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ef97f19c2f9d..873ad9b3418c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
 * mballoc.c contains the multiblocks allocation routines
 */
-#include <linux/time.h>
+#include "mballoc.h"
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/version.h>
-#include "group.h"
 /*
 * MUSTDO:
 *   - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
 *
 */
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-/*
- */
-#define MB_DEBUG__
-#ifdef MB_DEBUG
-#define mb_debug(fmt, a...)     printk(fmt, ##a)
-#else
-#define mb_debug(fmt, a...)
-#endif
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
-#define EXT4_MB_HISTORY_ALLOC           1       /* allocation */
-#define EXT4_MB_HISTORY_PREALLOC        2       /* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD         4       /* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE            8       /* free */
-#define EXT4_MB_HISTORY_DEFAULT         (EXT4_MB_HISTORY_ALLOC | \
-                                         EXT4_MB_HISTORY_PREALLOC)
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN          200
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN          10
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN   5
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS                1
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD     16      /* 64K */
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS          2
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC       512
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS      30
-struct ext4_free_metadata {
-        ext4_group_t group;
-        unsigned short num;
-        ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
-        struct list_head list;
-};
-struct ext4_group_info {
-        unsigned long   bb_state;
-        unsigned long   bb_tid;
-        struct ext4_free_metadata *bb_md_cur;
-        unsigned short  bb_first_free;
-        unsigned short  bb_free;
-        unsigned short  bb_fragments;
-        struct          list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
-        void            *bb_bitmap;
-#endif
-        unsigned short  bb_counters[];
-};
-#define EXT4_GROUP_INFO_NEED_INIT_BIT   0
-#define EXT4_GROUP_INFO_LOCKED_BIT      1
-#define EXT4_MB_GRP_NEED_INIT(grp)      \
-        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-struct ext4_prealloc_space {
-        struct list_head        pa_inode_list;
-        struct list_head        pa_group_list;
-        union {
-                struct list_head pa_tmp_list;
-                struct rcu_head pa_rcu;
-        } u;
-        spinlock_t              pa_lock;
-        atomic_t                pa_count;
-        unsigned                pa_deleted;
-        ext4_fsblk_t            pa_pstart;      /* phys. block */
-        ext4_lblk_t             pa_lstart;      /* log. block */
-        unsigned short          pa_len;         /* len of preallocated chunk */
-        unsigned short          pa_free;        /* how many blocks are free */
-        unsigned short          pa_linear;      /* consumed in one direction
-                                                 * strictly, for grp prealloc */
-        spinlock_t              *pa_obj_lock;
-        struct inode            *pa_inode;      /* hack, for history only */
-};
-struct ext4_free_extent {
-        ext4_lblk_t fe_logical;
-        ext4_grpblk_t fe_start;
-        ext4_group_t fe_group;
-        int fe_len;
-};
-/*
- * Locality group:
- *   we try to group all related changes together
- *   so that writeback can flush/allocate them together as well
- */
-struct ext4_locality_group {
-        /* for allocator */
-        struct mutex            lg_mutex;       /* to serialize allocates */
-        struct list_head        lg_prealloc_list;/* list of preallocations */
-        spinlock_t              lg_prealloc_lock;
-};
-struct ext4_allocation_context {
-        struct inode *ac_inode;
-        struct super_block *ac_sb;
-        /* original request */
-        struct ext4_free_extent ac_o_ex;
-        /* goal request (after normalization) */
-        struct ext4_free_extent ac_g_ex;
-        /* the best found extent */
-        struct ext4_free_extent ac_b_ex;
-        /* copy of the bext found extent taken before preallocation efforts */
-        struct ext4_free_extent ac_f_ex;
-        /* number of iterations done. we have to track to limit searching */
-        unsigned long ac_ex_scanned;
-        __u16 ac_groups_scanned;
-        __u16 ac_found;
-        __u16 ac_tail;
-        __u16 ac_buddy;
-        __u16 ac_flags;         /* allocation hints */
-        __u8 ac_status;
-        __u8 ac_criteria;
-        __u8 ac_repeats;
-        __u8 ac_2order;         /* if request is to allocate 2^N blocks and
-                                 * N > 0, the field stores N, otherwise 0 */
-        __u8 ac_op;             /* operation, for history only */
-        struct page *ac_bitmap_page;
-        struct page *ac_buddy_page;
-        struct ext4_prealloc_space *ac_pa;
-        struct ext4_locality_group *ac_lg;
-};
-#define AC_STATUS_CONTINUE      1
-#define AC_STATUS_FOUND         2
-#define AC_STATUS_BREAK         3
-struct ext4_mb_history {
-        struct ext4_free_extent orig;   /* orig allocation */
-        struct ext4_free_extent goal;   /* goal allocation */
-        struct ext4_free_extent result; /* result allocation */
-        unsigned pid;
-        unsigned ino;
-        __u16 found;    /* how many extents have been found */
-        __u16 groups;   /* how many groups have been scanned */
-        __u16 tail;     /* what tail broke some buddy */
-        __u16 buddy;    /* buddy the tail ^^^ broke */
-        __u16 flags;
-        __u8 cr:3;      /* which phase the result extent was found at */
-        __u8 op:4;
-        __u8 merged:1;
-};
-struct ext4_buddy {
-        struct page *bd_buddy_page;
-        void *bd_buddy;
-        struct page *bd_bitmap_page;
-        void *bd_bitmap;
-        struct ext4_group_info *bd_info;
-        struct super_block *bd_sb;
-        __u16 bd_blkbits;
-        ext4_group_t bd_group;
-};
-#define EXT4_MB_BITMAP(e4b)     ((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b)      ((e4b)->bd_buddy)
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-        return;
-}
-#else
-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
-#endif
-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-static struct proc_dir_entry *proc_root_ext4;
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
-                        ext4_fsblk_t goal, unsigned long *count, int *errp);
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-                                        ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_return_to_preallocation(struct inode *inode,
-                                        struct ext4_buddy *e4b, sector_t block,
-                                        int count);
-static void ext4_mb_put_pa(struct ext4_allocation_context *,
-                        struct super_block *, struct ext4_prealloc_space *pa);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-        bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-static inline void ext4_unlock_group(struct super_block *sb,
-                                        ext4_group_t group)
-{
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-        bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-static inline int ext4_is_group_locked(struct super_block *sb,
-                                        ext4_group_t group)
-{
-        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-        return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-                                                &(grinfo->bb_state));
-}
-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
-                                        struct ext4_free_extent *fex)
-{
-        ext4_fsblk_t block;
-        block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-                        + fex->fe_start
-                        + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-        return block;
-}
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
 #if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-                        ext4_error(sb, __FUNCTION__, "double-free of inode"
+                        ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr,
                                   first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
        list_for_each(cur, &grp->bb_prealloc_list) {
                ext4_group_t groupnr;
                struct ext4_prealloc_space *pa;
-                pa = list_entry(cur, struct ext4_prealloc_space, group_list);
+                pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
-                ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
+                ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
                MB_CHECK_ASSERT(groupnr == e4b->bd_group);
-                for (i = 0; i < pa->len; i++)
+                for (i = 0; i < pa->pa_len; i++)
                        MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
        }
        return 0;
 }
 #undef MB_CHECK_ASSERT
 #define mb_check_buddy(e4b) __mb_check_buddy(e4b,       \
-                                        __FILE__, __FUNCTION__, __LINE__)
+                                        __FILE__, __func__, __LINE__)
 #else
 #define mb_check_buddy(e4b)
 #endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
        grp->bb_fragments = fragments;
        if (free != grp->bb_free) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                        "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
                        group, free, grp->bb_free);
                /*
@@ -1168,8 +872,9 @@ out:
        return err;
 }
-static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+static noinline_for_stack int
-                struct ext4_buddy *e4b)
+ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                                        struct ext4_buddy *e4b)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-                        ext4_error(sb, __FUNCTION__, "double-free of inode"
+                        ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr, block,
                                   e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                         * free blocks even though group info says we
                         * we have free blocks
                         */
-                        ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+                        ext4_error(sb, __func__, "%d free blocks as per "
                                        "group info. But bitmap says 0\n",
                                        free);
                        break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
                BUG_ON(ex.fe_len <= 0);
                if (free < ex.fe_len) {
-                        ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+                        ext4_error(sb, __func__, "%d free blocks as per "
                                        "group info. But got %d blocks\n",
                                        free, ex.fe_len);
                        /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
-static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
        ext4_group_t group;
        ext4_group_t i;
@@ -2449,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb)
        int i;
        if (sbi->s_mb_proc != NULL) {
-                struct proc_dir_entry *p;
+                proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
-                p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc);
+                                 &ext4_mb_seq_history_fops, sb);
-                if (p) {
+                proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
-                        p->proc_fops = &ext4_mb_seq_history_fops;
+                                 &ext4_mb_seq_groups_fops, sb);
-                        p->data = sb;
-                }
-                p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
-                if (p) {
-                        p->proc_fops = &ext4_mb_seq_groups_fops;
-                        p->data = sb;
-                }
        }
        sbi->s_mb_history_max = 1000;
@@ -2472,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
        /* if we can't allocate history, then we simple won't use it */
 }
-static void ext4_mb_store_history(struct ext4_allocation_context *ac)
+static noinline_for_stack void
+ext4_mb_store_history(struct ext4_allocation_context *ac)
 {
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        struct ext4_mb_history h;
@@ -2572,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
                meta_group_info[j] = kzalloc(len, GFP_KERNEL);
                if (meta_group_info[j] == NULL) {
                        printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
-                        i--;
                        goto err_freebuddy;
                }
                desc = ext4_get_group_desc(sb, i, NULL);
                if (desc == NULL) {
                        printk(KERN_ERR
                                "EXT4-fs: can't read descriptor %lu\n", i);
+                        i++;
                        goto err_freebuddy;
                }
                memset(meta_group_info[j], 0, len);
@@ -2618,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
        return 0;
 err_freebuddy:
-        while (i >= 0) {
+        while (i-- > 0)
                kfree(ext4_get_group_info(sb, i));
-                i--;
-        }
        i = num_meta_group_infos;
 err_freemeta:
-        while (--i >= 0)
+        while (i-- > 0)
                kfree(sbi->s_group_info[i]);
        iput(sbi->s_buddy_cache);
 err_freesgi:
@@ -2808,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
        return 0;
 }
-static void ext4_mb_free_committed_blocks(struct super_block *sb)
+static noinline_for_stack void
+ext4_mb_free_committed_blocks(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        int err;
@@ -2867,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
        mb_debug("freed %u blocks in %u structures\n", count, count2);
 }
-#define EXT4_ROOT                       "ext4"
 #define EXT4_MB_STATS_NAME              "stats"
 #define EXT4_MB_MAX_TO_SCAN_NAME        "max_to_scan"
 #define EXT4_MB_MIN_TO_SCAN_NAME        "min_to_scan"
@@ -2941,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
        struct proc_dir_entry *proc;
        char devname[64];
-        snprintf(devname, sizeof(devname) - 1, "%s",
+        bdevname(sb->s_bdev, devname);
-                bdevname(sb->s_bdev, devname));
        sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
        MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
@@ -2976,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
        if (sbi->s_mb_proc == NULL)
                return -EINVAL;
-        snprintf(devname, sizeof(devname) - 1, "%s",
+        bdevname(sb->s_bdev, devname);
-                bdevname(sb->s_bdev, devname));
        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -3007,9 +2703,9 @@ int __init init_ext4_mballoc(void)
                return -ENOMEM;
        }
 #ifdef CONFIG_PROC_FS
-        proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs);
+        proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
        if (proc_root_ext4 == NULL)
-                printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT);
+                printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
 #endif
        return 0;
 }
@@ -3020,7 +2716,7 @@ void exit_ext4_mballoc(void)
        kmem_cache_destroy(ext4_pspace_cachep);
        kmem_cache_destroy(ext4_ac_cachep);
 #ifdef CONFIG_PROC_FS
-        remove_proc_entry(EXT4_ROOT, proc_root_fs);
+        remove_proc_entry("fs/ext4", NULL);
 #endif
 }
@@ -3029,7 +2725,8 @@ void exit_ext4_mballoc(void)
 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
 * Returns 0 if success or error code
 */
-static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                handle_t *handle)
 {
        struct buffer_head *bitmap_bh = NULL;
@@ -3039,7 +2736,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        ext4_fsblk_t block;
-        int err;
+        int err, len;
        BUG_ON(ac->ac_status != AC_STATUS_FOUND);
        BUG_ON(ac->ac_b_ex.fe_len <= 0);
@@ -3073,14 +2770,27 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                + ac->ac_b_ex.fe_start
                + le32_to_cpu(es->s_first_data_block);
-        if (block == ext4_block_bitmap(sb, gdp) ||
+        len = ac->ac_b_ex.fe_len;
-                        block == ext4_inode_bitmap(sb, gdp) ||
+        if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
-                        in_range(block, ext4_inode_table(sb, gdp),
+            in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
-                                EXT4_SB(sb)->s_itb_per_group)) {
+            in_range(block, ext4_inode_table(sb, gdp),
+                     EXT4_SB(sb)->s_itb_per_group) ||
-                ext4_error(sb, __FUNCTION__,
+            in_range(block + len - 1, ext4_inode_table(sb, gdp),
+                     EXT4_SB(sb)->s_itb_per_group)) {
+                ext4_error(sb, __func__,
                           "Allocating block in system zone - block = %llu",
                           block);
+                /* File system mounted not to panic on error
+                 * Fix the bitmap and repeat the block allocation
+                 * We leak some of the blocks here.
+                 */
+                mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
+                                bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+                                ac->ac_b_ex.fe_len);
+                err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+                if (!err)
+                        err = -EAGAIN;
+                goto out_err;
        }
 #ifdef AGGRESSIVE_CHECK
        {
@@ -3102,9 +2812,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                                ac->ac_b_ex.fe_group,
                                                gdp));
        }
-        gdp->bg_free_blocks_count =
+        le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
-                cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
-                                - ac->ac_b_ex.fe_len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
        percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3138,7 +2846,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
        else
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
-        mb_debug("#%u: goal %lu blocks for locality group\n",
+        mb_debug("#%u: goal %u blocks for locality group\n",
                current->pid, ac->ac_g_ex.fe_len);
 }
@@ -3146,15 +2854,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 * Normalization means making request better in terms of
 * size and alignment
 */
-static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static noinline_for_stack void
+ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        int bsbits, max;
        ext4_lblk_t end;
-        struct list_head *cur;
        loff_t size, orig_size, start_off;
        ext4_lblk_t start, orig_start;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+        struct ext4_prealloc_space *pa;
        /* do normalize only data requests, metadata requests
           do not need preallocation */
@@ -3184,12 +2893,11 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        if (size < i_size_read(ac->ac_inode))
                size = i_size_read(ac->ac_inode);
-        /* max available blocks in a free group */
+        /* max size of free chunks */
-        max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 -
+        max = 2 << bsbits;
-                                EXT4_SB(ac->ac_sb)->s_itb_per_group;
-#define NRL_CHECK_SIZE(req, size, max,bits)     \
+#define NRL_CHECK_SIZE(req, size, max, chunk_size)      \
-                (req <= (size) || max <= ((size) >> bits))
+                (req <= (size) || max <= (chunk_size))
        /* first, try to predict filesize */
        /* XXX: should this table be tunable? */
@@ -3208,16 +2916,16 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                size = 512 * 1024;
        } else if (size <= 1024 * 1024) {
                size = 1024 * 1024;
-        } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
+        } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-                                                (20 - bsbits)) << 20;
+                                                (21 - bsbits)) << 21;
-                size = 1024 * 1024;
+                size = 2 * 1024 * 1024;
-        } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
+        } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
                                                        (22 - bsbits)) << 22;
                size = 4 * 1024 * 1024;
        } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
-                                        (8<<20)>>bsbits, max, bsbits)) {
+                                        (8<<20)>>bsbits, max, 8 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
                                                        (23 - bsbits)) << 23;
                size = 8 * 1024 * 1024;
@@ -3240,12 +2948,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        /* check we don't cross already preallocated blocks */
        rcu_read_lock();
-        list_for_each_rcu(cur, &ei->i_prealloc_list) {
+        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-                struct ext4_prealloc_space *pa;
                unsigned long pa_end;
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                if (pa->pa_deleted)
                        continue;
                spin_lock(&pa->pa_lock);
@@ -3287,10 +2992,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        /* XXX: extra loop to check we really don't overlap preallocations */
        rcu_read_lock();
-        list_for_each_rcu(cur, &ei->i_prealloc_list) {
+        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-                struct ext4_prealloc_space *pa;
                unsigned long pa_end;
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0) {
                        pa_end = pa->pa_lstart + pa->pa_len;
@@ -3382,7 +3085,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
        BUG_ON(pa->pa_free < len);
        pa->pa_free -= len;
-        mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa);
+        mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
 }
 /*
@@ -3412,12 +3115,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 /*
 * search goal blocks in preallocated space
 */
-static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
        struct ext4_prealloc_space *pa;
-        struct list_head *cur;
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3425,8 +3128,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        /* first, try per-file preallocation */
        rcu_read_lock();
-        list_for_each_rcu(cur, &ei->i_prealloc_list) {
+        list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                /* all fields in this condition don't change,
                 * so we can skip locking for them */
@@ -3458,8 +3160,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                return 0;
        rcu_read_lock();
-        list_for_each_rcu(cur, &lg->lg_prealloc_list) {
+        list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
-                pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
                        atomic_inc(&pa->pa_count);
@@ -3579,7 +3280,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
 * creates new preallocated space for given inode
 */
-static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_prealloc_space *pa;
@@ -3666,7 +3368,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 /*
 * creates new preallocated space for locality group inodes belongs to
 */
-static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_locality_group *lg;
@@ -3739,11 +3442,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
 * the caller MUST hold group/inode locks.
 * TODO: optimize the case when there are no in-core structures yet
 */
-static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
+static noinline_for_stack int
-                                struct buffer_head *bitmap_bh,
+ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
-                                struct ext4_prealloc_space *pa)
+                        struct ext4_prealloc_space *pa,
+                        struct ext4_allocation_context *ac)
 {
-        struct ext4_allocation_context *ac;
        struct super_block *sb = e4b->bd_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        unsigned long end;
@@ -3759,8 +3462,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
-        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
                ac->ac_sb = sb;
                ac->ac_inode = pa->pa_inode;
@@ -3797,7 +3498,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                        pa, (unsigned long) pa->pa_lstart,
                        (unsigned long) pa->pa_pstart,
                        (unsigned long) pa->pa_len);
-                ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
+                ext4_error(sb, __func__, "free %u, pa_free %u\n",
                                                free, pa->pa_free);
                /*
                 * pa is already deleted so we use the value obtained
@@ -3805,22 +3506,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                 */
        }
        atomic_add(free, &sbi->s_mb_discarded);
-        if (ac)
-                kmem_cache_free(ext4_ac_cachep, ac);
        return err;
 }
-static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+static noinline_for_stack int
-                                struct ext4_prealloc_space *pa)
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+                                struct ext4_prealloc_space *pa,
+                                struct ext4_allocation_context *ac)
 {
-        struct ext4_allocation_context *ac;
        struct super_block *sb = e4b->bd_sb;
        ext4_group_t group;
        ext4_grpblk_t bit;
-        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac)
                ac->ac_op = EXT4_MB_HISTORY_DISCARD;
@@ -3838,7 +3536,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
                ac->ac_b_ex.fe_len = pa->pa_len;
                ac->ac_b_ex.fe_logical = 0;
                ext4_mb_store_history(ac);
-                kmem_cache_free(ext4_ac_cachep, ac);
        }
        return 0;
@@ -3853,12 +3550,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 * - how many do we discard
 *   1) how many requested
 */
-static int ext4_mb_discard_group_preallocations(struct super_block *sb,
+static noinline_for_stack int
+ext4_mb_discard_group_preallocations(struct super_block *sb,
                                        ext4_group_t group, int needed)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+        struct ext4_allocation_context *ac;
        struct list_head list;
        struct ext4_buddy e4b;
        int err;
@@ -3886,6 +3585,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
        grp = ext4_get_group_info(sb, group);
        INIT_LIST_HEAD(&list);
+        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        ext4_lock_group(sb, group);
        list_for_each_entry_safe(pa, tmp,
@@ -3940,9 +3640,9 @@ repeat:
                spin_unlock(pa->pa_obj_lock);
                if (pa->pa_linear)
-                        ext4_mb_release_group_pa(&e4b, pa);
+                        ext4_mb_release_group_pa(&e4b, pa, ac);
                else
-                        ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+                        ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3950,6 +3650,8 @@ repeat:
 out:
        ext4_unlock_group(sb, group);
+        if (ac)
+                kmem_cache_free(ext4_ac_cachep, ac);
        ext4_mb_release_desc(&e4b);
        put_bh(bitmap_bh);
        return free;
@@ -3970,6 +3672,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+        struct ext4_allocation_context *ac;
        ext4_group_t group = 0;
        struct list_head list;
        struct ext4_buddy e4b;
@@ -3984,6 +3687,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
        INIT_LIST_HEAD(&list);
+        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        /* first, collect all pa's in the inode */
        spin_lock(&ei->i_prealloc_lock);
@@ -4048,7 +3752,7 @@ repeat:
                ext4_lock_group(sb, group);
                list_del(&pa->pa_group_list);
-                ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+                ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                ext4_unlock_group(sb, group);
                ext4_mb_release_desc(&e4b);
@@ -4057,6 +3761,8 @@ repeat:
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
+        if (ac)
+                kmem_cache_free(ext4_ac_cachep, ac);
 }
 /*
@@ -4116,7 +3822,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
                        printk(KERN_ERR "PA:%lu:%d:%u \n", i,
                                                        start, pa->pa_len);
                }
-                ext4_lock_group(sb, i);
+                ext4_unlock_group(sb, i);
                if (grp->bb_free == 0)
                        continue;
@@ -4175,7 +3881,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
        mutex_lock(&ac->ac_lg->lg_mutex);
 }
-static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_initialize_context(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        struct super_block *sb = ar->inode->i_sb;
@@ -4338,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                ac->ac_op = EXT4_MB_HISTORY_ALLOC;
                ext4_mb_normalize_request(ac, ar);
 repeat:
                /* allocate space in core */
                ext4_mb_regular_allocator(ac);
@@ -4352,10 +4058,21 @@ repeat:
        }
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-                ext4_mb_mark_diskspace_used(ac, handle);
+                *errp = ext4_mb_mark_diskspace_used(ac, handle);
-                *errp = 0;
+                if (*errp ==  -EAGAIN) {
-                block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+                        ac->ac_b_ex.fe_group = 0;
-                ar->len = ac->ac_b_ex.fe_len;
+                        ac->ac_b_ex.fe_start = 0;
+                        ac->ac_b_ex.fe_len = 0;
+                        ac->ac_status = AC_STATUS_CONTINUE;
+                        goto repeat;
+                } else if (*errp) {
+                        ac->ac_b_ex.fe_len = 0;
+                        ar->len = 0;
+                        ext4_mb_show_ac(ac);
+                } else {
+                        block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+                        ar->len = ac->ac_b_ex.fe_len;
+                }
        } else {
                freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
                if (freed)
@@ -4406,7 +4123,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
        ext4_mb_free_committed_blocks(sb);
 }
-static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static noinline_for_stack int
+ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                          ext4_group_t group, ext4_grpblk_t block, int count)
 {
        struct ext4_group_info *db = e4b->bd_info;
@@ -4497,7 +4215,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        if (block < le32_to_cpu(es->s_first_data_block) ||
            block + count < block ||
            block + count > ext4_blocks_count(es)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %lu, count = %lu", block, count);
                goto error_return;
@@ -4538,9 +4256,11 @@ do_more:
            in_range(block + count - 1, ext4_inode_table(sb, gdp),
                      EXT4_SB(sb)->s_itb_per_group)) {
-                ext4_error(sb, __FUNCTION__,
+                ext4_error(sb, __func__,
                           "Freeing blocks in system zone - "
                           "Block = %lu, count = %lu", block, count);
+                /* err = 0. ext4_std_error should be a no op */
+                goto error_return;
        }
        BUFFER_TRACE(bitmap_bh, "getting write access");
@@ -4596,8 +4316,7 @@ do_more:
        }
        spin_lock(sb_bgl_lock(sbi, block_group));
-        gdp->bg_free_blocks_count =
+        le16_add_cpu(&gdp->bg_free_blocks_count, count);
-                cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
+/*
+ *  fs/ext4/mballoc.h
+ *
+ *  Written by: Alex Tomas <alex@clusterfs.com>
+ *
+ */
+#ifndef _EXT4_MBALLOC_H
+#define _EXT4_MBALLOC_H
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "group.h"
+/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+/*
+ */
+#define MB_DEBUG__
+#ifdef MB_DEBUG
+#define mb_debug(fmt, a...)     printk(fmt, ##a)
+#else
+#define mb_debug(fmt, a...)
+#endif
+/*
+ * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
+ * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
+ */
+#define EXT4_MB_HISTORY
+#define EXT4_MB_HISTORY_ALLOC           1       /* allocation */
+#define EXT4_MB_HISTORY_PREALLOC        2       /* preallocated blocks used */
+#define EXT4_MB_HISTORY_DISCARD         4       /* preallocation discarded */
+#define EXT4_MB_HISTORY_FREE            8       /* free */
+#define EXT4_MB_HISTORY_DEFAULT         (EXT4_MB_HISTORY_ALLOC | \
+                                         EXT4_MB_HISTORY_PREALLOC)
+/*
+ * How long mballoc can look for a best extent (in found extents)
+ */
+#define MB_DEFAULT_MAX_TO_SCAN          200
+/*
+ * How long mballoc must look for a best extent
+ */
+#define MB_DEFAULT_MIN_TO_SCAN          10
+/*
+ * How many groups mballoc will scan looking for the best chunk
+ */
+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN   5
+/*
+ * with 'ext4_mb_stats' allocator will collect stats that will be
+ * shown at umount. The collecting costs though!
+ */
+#define MB_DEFAULT_STATS                1
+/*
+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
+ * by the stream allocator, which purpose is to pack requests
+ * as close each to other as possible to produce smooth I/O traffic
+ * We use locality group prealloc space for stream request.
+ * We can tune the same via /proc/fs/ext4/<parition>/stream_req
+ */
+#define MB_DEFAULT_STREAM_THRESHOLD     16      /* 64K */
+/*
+ * for which requests use 2^N search using buddies
+ */
+#define MB_DEFAULT_ORDER2_REQS          2
+/*
+ * default group prealloc size 512 blocks
+ */
+#define MB_DEFAULT_GROUP_PREALLOC       512
+static struct kmem_cache *ext4_pspace_cachep;
+static struct kmem_cache *ext4_ac_cachep;
+#ifdef EXT4_BB_MAX_BLOCKS
+#undef EXT4_BB_MAX_BLOCKS
+#endif
+#define EXT4_BB_MAX_BLOCKS      30
+struct ext4_free_metadata {
+        ext4_group_t group;
+        unsigned short num;
+        ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+        struct list_head list;
+};
+struct ext4_group_info {
+        unsigned long   bb_state;
+        unsigned long   bb_tid;
+        struct ext4_free_metadata *bb_md_cur;
+        unsigned short  bb_first_free;
+        unsigned short  bb_free;
+        unsigned short  bb_fragments;
+        struct          list_head bb_prealloc_list;
+#ifdef DOUBLE_CHECK
+        void            *bb_bitmap;
+#endif
+        unsigned short  bb_counters[];
+};
+#define EXT4_GROUP_INFO_NEED_INIT_BIT   0
+#define EXT4_GROUP_INFO_LOCKED_BIT      1
+#define EXT4_MB_GRP_NEED_INIT(grp)      \
+        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+struct ext4_prealloc_space {
+        struct list_head        pa_inode_list;
+        struct list_head        pa_group_list;
+        union {
+                struct list_head pa_tmp_list;
+                struct rcu_head pa_rcu;
+        } u;
+        spinlock_t              pa_lock;
+        atomic_t                pa_count;
+        unsigned                pa_deleted;
+        ext4_fsblk_t            pa_pstart;      /* phys. block */
+        ext4_lblk_t             pa_lstart;      /* log. block */
+        unsigned short          pa_len;         /* len of preallocated chunk */
+        unsigned short          pa_free;        /* how many blocks are free */
+        unsigned short          pa_linear;      /* consumed in one direction
+                                                 * strictly, for grp prealloc */
+        spinlock_t              *pa_obj_lock;
+        struct inode            *pa_inode;      /* hack, for history only */
+};
+struct ext4_free_extent {
+        ext4_lblk_t fe_logical;
+        ext4_grpblk_t fe_start;
+        ext4_group_t fe_group;
+        int fe_len;
+};
+/*
+ * Locality group:
+ *   we try to group all related changes together
+ *   so that writeback can flush/allocate them together as well
+ */
+struct ext4_locality_group {
+        /* for allocator */
+        struct mutex            lg_mutex;       /* to serialize allocates */
+        struct list_head        lg_prealloc_list;/* list of preallocations */
+        spinlock_t              lg_prealloc_lock;
+};
+struct ext4_allocation_context {
+        struct inode *ac_inode;
+        struct super_block *ac_sb;
+        /* original request */
+        struct ext4_free_extent ac_o_ex;
+        /* goal request (after normalization) */
+        struct ext4_free_extent ac_g_ex;
+        /* the best found extent */
+        struct ext4_free_extent ac_b_ex;
+        /* copy of the bext found extent taken before preallocation efforts */
+        struct ext4_free_extent ac_f_ex;
+        /* number of iterations done. we have to track to limit searching */
+        unsigned long ac_ex_scanned;
+        __u16 ac_groups_scanned;
+        __u16 ac_found;
+        __u16 ac_tail;
+        __u16 ac_buddy;
+        __u16 ac_flags;         /* allocation hints */
+        __u8 ac_status;
+        __u8 ac_criteria;
+        __u8 ac_repeats;
+        __u8 ac_2order;         /* if request is to allocate 2^N blocks and
+                                 * N > 0, the field stores N, otherwise 0 */
+        __u8 ac_op;             /* operation, for history only */
+        struct page *ac_bitmap_page;
+        struct page *ac_buddy_page;
+        struct ext4_prealloc_space *ac_pa;
+        struct ext4_locality_group *ac_lg;
+};
+#define AC_STATUS_CONTINUE      1
+#define AC_STATUS_FOUND         2
+#define AC_STATUS_BREAK         3
+struct ext4_mb_history {
+        struct ext4_free_extent orig;   /* orig allocation */
+        struct ext4_free_extent goal;   /* goal allocation */
+        struct ext4_free_extent result; /* result allocation */
+        unsigned pid;
+        unsigned ino;
+        __u16 found;    /* how many extents have been found */
+        __u16 groups;   /* how many groups have been scanned */
+        __u16 tail;     /* what tail broke some buddy */
+        __u16 buddy;    /* buddy the tail ^^^ broke */
+        __u16 flags;
+        __u8 cr:3;      /* which phase the result extent was found at */
+        __u8 op:4;
+        __u8 merged:1;
+};
+struct ext4_buddy {
+        struct page *bd_buddy_page;
+        void *bd_buddy;
+        struct page *bd_bitmap_page;
+        void *bd_bitmap;
+        struct ext4_group_info *bd_info;
+        struct super_block *bd_sb;
+        __u16 bd_blkbits;
+        ext4_group_t bd_group;
+};
+#define EXT4_MB_BITMAP(e4b)     ((e4b)->bd_bitmap)
+#define EXT4_MB_BUDDY(e4b)      ((e4b)->bd_buddy)
+#ifndef EXT4_MB_HISTORY
+static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
+{
+        return;
+}
+#else
+static void ext4_mb_store_history(struct ext4_allocation_context *ac);
+#endif
+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+static struct proc_dir_entry *proc_root_ext4;
+struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                        ext4_group_t group);
+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+static void ext4_mb_free_committed_blocks(struct super_block *);
+static void ext4_mb_return_to_preallocation(struct inode *inode,
+                                        struct ext4_buddy *e4b, sector_t block,
+                                        int count);
+static void ext4_mb_put_pa(struct ext4_allocation_context *,
+                        struct super_block *, struct ext4_prealloc_space *pa);
+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+{
+        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+static inline void ext4_unlock_group(struct super_block *sb,
+                                        ext4_group_t group)
+{
+        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+static inline int ext4_is_group_locked(struct super_block *sb,
+                                        ext4_group_t group)
+{
+        struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+        return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+                                                &(grinfo->bb_state));
+}
+static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
+                                        struct ext4_free_extent *fex)
+{
+        ext4_fsblk_t block;
+        block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
+                        + fex->fe_start
+                        + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+        return block;
+}
+#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
 */
 #include <linux/module.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs_extents.h>
+#include "ext4_extents.h"
 /*
 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
 }
 static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
-                                struct inode *tmp_inode)
+                                                struct inode *tmp_inode)
 {
        int retval;
        __le32  i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
         * i_data field of the original inode
         */
        retval = ext4_journal_extend(handle, 1);
-        if (retval != 0) {
+        if (retval) {
                retval = ext4_journal_restart(handle, 1);
                if (retval)
                        goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
        down_write(&EXT4_I(inode)->i_data_sem);
        /*
+         * if EXT4_EXT_MIGRATE is cleared a block allocation
+         * happened after we started the migrate. We need to
+         * fail the migrate
+         */
+        if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
+                retval = -EAGAIN;
+                up_write(&EXT4_I(inode)->i_data_sem);
+                goto err_out;
+        } else
+                EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+                                                        ~EXT4_EXT_MIGRATE;
+        /*
         * We have the extent map build with the tmp inode.
         * Now copy the i_data across
         */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
         * switch the inode format to prevent read.
         */
        mutex_lock(&(inode->i_mutex));
+        /*
+         * Even though we take i_mutex we can still cause block allocation
+         * via mmap write to holes. If we have allocated new blocks we fail
+         * migrate.  New block allocation will clear EXT4_EXT_MIGRATE flag.
+         * The flag is updated with i_data_sem held to prevent racing with
+         * block allocation.
+         */
+        down_read((&EXT4_I(inode)->i_data_sem));
+        EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
+        up_read((&EXT4_I(inode)->i_data_sem));
        handle = ext4_journal_start(inode, 1);
        ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
                 * tmp_inode
                 */
                free_ext_block(handle, tmp_inode);
-        else
+        else {
-                retval = ext4_ext_swap_inode_data(handle, inode,
+                retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
-                                                        tmp_inode);
+                if (retval)
+                        /*
+                         * if we fail to swap inode data free the extent
+                         * details of the tmp inode
+                         */
+                        free_ext_block(handle, tmp_inode);
+        }
        /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
        if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
 #include <linux/pagemap.h>
 #include <linux/jbd2.h>
 #include <linux/time.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 #include <linux/bio.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "namei.h"
 #include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
        *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-        if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
+        bh = ext4_bread(handle, inode, *block, 1, err);
+        if (bh) {
                inode->i_size += inode->i_sb->s_blocksize;
                EXT4_I(inode)->i_disksize = inode->i_size;
-                ext4_journal_get_write_access(handle,bh);
+                *err = ext4_journal_get_write_access(handle, bh);
+                if (*err) {
+                        brelse(bh);
+                        bh = NULL;
+                }
        }
        return bh;
 }
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        if (root->info.hash_version != DX_HASH_TEA &&
            root->info.hash_version != DX_HASH_HALF_MD4 &&
            root->info.hash_version != DX_HASH_LEGACY) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Unrecognised inode hash code %d",
                             root->info.hash_version);
                brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        hash = hinfo->hash;
        if (root->info.unused_flags & 1) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Unimplemented inode hash flags: %#06x",
                             root->info.unused_flags);
                brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        }
        if ((indirect = root->info.indirect_levels) > 1) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Unimplemented inode hash depth: %#06x",
                             root->info.indirect_levels);
                brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        if (dx_get_limit(entries) != dx_root_limit(dir,
                                                   root->info.info_length)) {
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "dx entry: limit != root limit");
                brelse(bh);
                *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
        {
                count = dx_get_count(entries);
                if (!count || count > dx_get_limit(entries)) {
-                        ext4_warning(dir->i_sb, __FUNCTION__,
+                        ext4_warning(dir->i_sb, __func__,
                                     "dx entry: no count or count > limit");
                        brelse(bh);
                        *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
                        goto fail2;
                at = entries = ((struct dx_node *) bh->b_data)->entries;
                if (dx_get_limit(entries) != dx_node_limit (dir)) {
-                        ext4_warning(dir->i_sb, __FUNCTION__,
+                        ext4_warning(dir->i_sb, __func__,
                                     "dx entry: limit != node limit");
                        brelse(bh);
                        *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
        }
 fail:
        if (*err == ERR_BAD_DX_DIR)
-                ext4_warning(dir->i_sb, __FUNCTION__,
+                ext4_warning(dir->i_sb, __func__,
                             "Corrupt dir inode %ld, running e2fsck is "
                             "recommended.", dir->i_ino);
        return NULL;
@@ -914,7 +919,7 @@ restart:
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
                        /* read error, skip block & hope for the best */
-                        ext4_error(sb, __FUNCTION__, "reading directory #%lu "
+                        ext4_error(sb, __func__, "reading directory #%lu "
                                   "offset %lu", dir->i_ino,
                                   (unsigned long)block);
                        brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
                retval = ext4_htree_next_block(dir, hash, frame,
                                               frames, NULL);
                if (retval < 0) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                             "error reading index page in directory #%lu",
                             dir->i_ino);
                        *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
                if (levels && (dx_get_count(frames->entries) ==
                               dx_get_limit(frames->entries))) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "Directory index full!");
                        err = -ENOSPC;
                        goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
        if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
            !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
                if (err)
-                        ext4_error(inode->i_sb, __FUNCTION__,
+                        ext4_error(inode->i_sb, __func__,
                                   "error %d reading directory #%lu offset 0",
                                   err, inode->i_ino);
                else
-                        ext4_warning(inode->i_sb, __FUNCTION__,
+                        ext4_warning(inode->i_sb, __func__,
                                     "bad directory (dir #%lu) - no data block",
                                     inode->i_ino);
                return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
                                offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
                        if (!bh) {
                                if (err)
-                                        ext4_error(sb, __FUNCTION__,
+                                        ext4_error(sb, __func__,
                                                   "error %d reading directory"
                                                   " #%lu offset %lu",
                                                   err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
                        goto out_stop;
                }
        } else {
+                /* clear the extent format for fast symlink */
+                EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
                inode->i_op = &ext4_fast_symlink_inode_operations;
                memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
                inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
                                              EXT4_FEATURE_INCOMPAT_FILETYPE))
                        new_de->file_type = old_de->file_type;
                new_dir->i_version++;
+                new_dir->i_ctime = new_dir->i_mtime =
+                                        ext4_current_time(new_dir);
+                ext4_mark_inode_dirty(handle, new_dir);
                BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
                ext4_journal_dirty_metadata(handle, new_bh);
                brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
 #define EXT4FS_DEBUG
-#include <linux/ext4_jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include "ext4_jbd2.h"
 #include "group.h"
 #define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
        ext4_get_group_no_and_offset(sb, start, NULL, &offset);
        if (group != sbi->s_groups_count)
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Cannot add at group %u (only %lu groups)",
                             input->group, sbi->s_groups_count);
        else if (offset != 0)
-                        ext4_warning(sb, __FUNCTION__, "Last group not full");
+                        ext4_warning(sb, __func__, "Last group not full");
        else if (input->reserved_blocks > input->blocks_count / 5)
-                ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+                ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
                             input->reserved_blocks);
        else if (free_blocks_count < 0)
-                ext4_warning(sb, __FUNCTION__, "Bad blocks count %u",
+                ext4_warning(sb, __func__, "Bad blocks count %u",
                             input->blocks_count);
        else if (!(bh = sb_bread(sb, end - 1)))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Cannot read last block (%llu)",
                             end - 1);
        else if (outside(input->block_bitmap, start, end))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap not in group (block %llu)",
                             (unsigned long long)input->block_bitmap);
        else if (outside(input->inode_bitmap, start, end))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode bitmap not in group (block %llu)",
                             (unsigned long long)input->inode_bitmap);
        else if (outside(input->inode_table, start, end) ||
                 outside(itend - 1, start, end))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode table not in group (blocks %llu-%llu)",
                             (unsigned long long)input->inode_table, itend - 1);
        else if (input->inode_bitmap == input->block_bitmap)
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap same as inode bitmap (%llu)",
                             (unsigned long long)input->block_bitmap);
        else if (inside(input->block_bitmap, input->inode_table, itend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap (%llu) in inode table (%llu-%llu)",
                             (unsigned long long)input->block_bitmap,
                             (unsigned long long)input->inode_table, itend - 1);
        else if (inside(input->inode_bitmap, input->inode_table, itend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode bitmap (%llu) in inode table (%llu-%llu)",
                             (unsigned long long)input->inode_bitmap,
                             (unsigned long long)input->inode_table, itend - 1);
        else if (inside(input->block_bitmap, start, metaend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Block bitmap (%llu) in GDT table"
                             " (%llu-%llu)",
                             (unsigned long long)input->block_bitmap,
                             start, metaend - 1);
        else if (inside(input->inode_bitmap, start, metaend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode bitmap (%llu) in GDT table"
                             " (%llu-%llu)",
                             (unsigned long long)input->inode_bitmap,
                             start, metaend - 1);
        else if (inside(input->inode_table, start, metaend) ||
                 inside(itend - 1, start, metaend))
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Inode table (%llu-%llu) overlaps"
                             "GDT table (%llu-%llu)",
                             (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
        while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
                if (le32_to_cpu(*p++) !=
                    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "reserved GDT %llu"
                                     " missing grp %d (%llu)",
                                     blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
         */
        if (EXT4_SB(sb)->s_sbh->b_blocknr !=
            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                        "won't resize using backup superblock at %llu",
                        (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
                return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        data = (__le32 *)dind->b_data;
        if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "new group %u GDT block %llu not reserved",
                             input->group, gdblock);
                err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                goto exit_dindj;
        n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
-                        GFP_KERNEL);
+                        GFP_NOFS);
        if (!n_group_desc) {
                err = -ENOMEM;
-                ext4_warning (sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                              "not enough memory for %lu groups", gdb_num + 1);
                goto exit_inode;
        }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        EXT4_SB(sb)->s_gdb_count++;
        kfree(o_group_desc);
-        es->s_reserved_gdt_blocks =
+        le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
-                cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
        ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
        return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        int res, i;
        int err;
-        primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+        primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
        if (!primary)
                return -ENOMEM;
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        /* Get each reserved primary GDT block and verify it holds backups */
        for (res = 0; res < reserved_gdb; res++, blk++) {
                if (le32_to_cpu(*data) != blk) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "reserved block %llu"
                                     " not at offset %ld",
                                     blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
         */
 exit_err:
        if (err) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "can't update backup for group %lu (err %d), "
                             "forcing fsck on next reboot", group, err);
                sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "Can't resize non-sparse filesystem further");
                return -EPERM;
        }
        if (ext4_blocks_count(es) + input->blocks_count <
            ext4_blocks_count(es)) {
-                ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+                ext4_warning(sb, __func__, "blocks_count overflow\n");
                return -EINVAL;
        }
        if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
            le32_to_cpu(es->s_inodes_count)) {
-                ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+                ext4_warning(sb, __func__, "inodes_count overflow\n");
                return -EINVAL;
        }
        if (reserved_gdb || gdb_off == 0) {
                if (!EXT4_HAS_COMPAT_FEATURE(sb,
                                             EXT4_FEATURE_COMPAT_RESIZE_INODE)){
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "No reserved GDT blocks, can't resize");
                        return -EPERM;
                }
                inode = ext4_iget(sb, EXT4_RESIZE_INO);
                if (IS_ERR(inode)) {
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                                     "Error opening resize inode");
                        return PTR_ERR(inode);
                }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        lock_super(sb);
        if (input->group != sbi->s_groups_count) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "multiple resizers run on filesystem!");
                err = -EBUSY;
                goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         */
        ext4_blocks_count_set(es, ext4_blocks_count(es) +
                input->blocks_count);
-        es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
+        le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
-                EXT4_INODES_PER_GROUP(sb));
        /*
         * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                        " too large to resize to %llu blocks safely\n",
                        sb->s_id, n_blocks_count);
                if (sizeof(sector_t) < 8)
-                        ext4_warning(sb, __FUNCTION__,
+                        ext4_warning(sb, __func__,
                        "CONFIG_LBD not enabled\n");
                return -EINVAL;
        }
        if (n_blocks_count < o_blocks_count) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "can't shrink FS - resize aborted");
                return -EBUSY;
        }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
        if (last == 0) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "need to use ext2online to resize further");
                return -EPERM;
        }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        add = EXT4_BLOCKS_PER_GROUP(sb) - last;
        if (o_blocks_count + add < o_blocks_count) {
-                ext4_warning(sb, __FUNCTION__, "blocks_count overflow");
+                ext4_warning(sb, __func__, "blocks_count overflow");
                return -EINVAL;
        }
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
                add = n_blocks_count - o_blocks_count;
        if (o_blocks_count + add < n_blocks_count)
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "will only finish group (%llu"
                             " blocks, %u new)",
                             o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        /* See if the device is actually as big as what was requested */
        bh = sb_bread(sb, o_blocks_count + add -1);
        if (!bh) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "can't read last block, resize aborted");
                return -ENOSPC;
        }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        handle = ext4_journal_start_sb(sb, 3);
        if (IS_ERR(handle)) {
                err = PTR_ERR(handle);
-                ext4_warning(sb, __FUNCTION__, "error %d on journal start",err);
+                ext4_warning(sb, __func__, "error %d on journal start", err);
                goto exit_put;
        }
        lock_super(sb);
        if (o_blocks_count != ext4_blocks_count(es)) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "multiple resizers run on filesystem!");
                unlock_super(sb);
                ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        if ((err = ext4_journal_get_write_access(handle,
                                                 EXT4_SB(sb)->s_sbh))) {
-                ext4_warning(sb, __FUNCTION__,
+                ext4_warning(sb, __func__,
                             "error %d on journal write access", err);
                unlock_super(sb);
                ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c81a8e759bad..09d9359c8055 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -38,9 +36,10 @@
 #include <linux/seq_file.h>
 #include <linux/log2.h>
 #include <linux/crc16.h>
 #include <asm/uaccess.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
 #include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
         * take the FS itself readonly cleanly. */
        journal = EXT4_SB(sb)->s_journal;
        if (is_journal_aborted(journal)) {
-                ext4_abort(sb, __FUNCTION__,
+                ext4_abort(sb, __func__,
                           "Detected aborted journal");
                return ERR_PTR(-EROFS);
        }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
        if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
                return;
-        ext4_warning(sb, __FUNCTION__,
+        ext4_warning(sb, __func__,
                     "updating to rev %d because of new feature flag, "
                     "running e2fsck is recommended",
                     EXT4_DYNAMIC_REV);
@@ -945,8 +944,8 @@ static match_table_t tokens = {
        {Opt_mballoc, "mballoc"},
        {Opt_nomballoc, "nomballoc"},
        {Opt_stripe, "stripe=%u"},
-        {Opt_err, NULL},
        {Opt_resize, "resize"},
+        {Opt_err, NULL},
 };
 static ext4_fsblk_t get_sb_block(void **data)
@@ -980,7 +979,7 @@ static int parse_options (char *options, struct super_block *sb,
        int data_opt = 0;
        int option;
 #ifdef CONFIG_QUOTA
-        int qtype;
+        int qtype, qfmt;
        char *qname;
 #endif
@@ -1163,9 +1162,11 @@ static int parse_options (char *options, struct super_block *sb,
                case Opt_grpjquota:
                        qtype = GRPQUOTA;
 set_qf_name:
-                        if (sb_any_quota_enabled(sb)) {
+                        if ((sb_any_quota_enabled(sb) ||
+                             sb_any_quota_suspended(sb)) &&
+                            !sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR
-                                        "EXT4-fs: Cannot change journalled "
+                                        "EXT4-fs: Cannot change journaled "
                                        "quota options when quota turned on.\n");
                                return 0;
                        }
@@ -1201,9 +1202,11 @@ set_qf_name:
                case Opt_offgrpjquota:
                        qtype = GRPQUOTA;
 clear_qf_name:
-                        if (sb_any_quota_enabled(sb)) {
+                        if ((sb_any_quota_enabled(sb) ||
+                             sb_any_quota_suspended(sb)) &&
+                            sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR "EXT4-fs: Cannot change "
-                                        "journalled quota options when "
+                                        "journaled quota options when "
                                        "quota turned on.\n");
                                return 0;
                        }
@@ -1214,10 +1217,20 @@ clear_qf_name:
                        sbi->s_qf_names[qtype] = NULL;
                        break;
                case Opt_jqfmt_vfsold:
-                        sbi->s_jquota_fmt = QFMT_VFS_OLD;
+                        qfmt = QFMT_VFS_OLD;
-                        break;
+                        goto set_qf_format;
                case Opt_jqfmt_vfsv0:
-                        sbi->s_jquota_fmt = QFMT_VFS_V0;
+                        qfmt = QFMT_VFS_V0;
+set_qf_format:
+                        if ((sb_any_quota_enabled(sb) ||
+                             sb_any_quota_suspended(sb)) &&
+                            sbi->s_jquota_fmt != qfmt) {
+                                printk(KERN_ERR "EXT4-fs: Cannot change "
+                                        "journaled quota options when "
+                                        "quota turned on.\n");
+                                return 0;
+                        }
+                        sbi->s_jquota_fmt = qfmt;
                        break;
                case Opt_quota:
                case Opt_usrquota:
@@ -1242,6 +1255,9 @@ clear_qf_name:
                case Opt_quota:
                case Opt_usrquota:
                case Opt_grpquota:
+                        printk(KERN_ERR
+                                "EXT4-fs: quota options not supported.\n");
+                        break;
                case Opt_usrjquota:
                case Opt_grpjquota:
                case Opt_offusrjquota:
@@ -1249,7 +1265,7 @@ clear_qf_name:
                case Opt_jqfmt_vfsold:
                case Opt_jqfmt_vfsv0:
                        printk(KERN_ERR
-                                "EXT4-fs: journalled quota options not "
+                                "EXT4-fs: journaled quota options not "
                                "supported.\n");
                        break;
                case Opt_noquota:
@@ -1334,14 +1350,14 @@ clear_qf_name:
                }
                if (!sbi->s_jquota_fmt) {
-                        printk(KERN_ERR "EXT4-fs: journalled quota format "
+                        printk(KERN_ERR "EXT4-fs: journaled quota format "
                                        "not specified.\n");
                        return 0;
                }
        } else {
                if (sbi->s_jquota_fmt) {
-                        printk(KERN_ERR "EXT4-fs: journalled quota format "
+                        printk(KERN_ERR "EXT4-fs: journaled quota format "
-                                        "specified with no journalling "
+                                        "specified with no journaling "
                                        "enabled.\n");
                        return 0;
                }
@@ -1388,11 +1404,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                 * a plain journaled filesystem we can keep it set as
                 * valid forever! :)
                 */
-        es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
+        es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 #endif
        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
-        es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+        le16_add_cpu(&es->s_mnt_count, 1);
        es->s_mtime = cpu_to_le32(get_seconds());
        ext4_update_dynamic_rev(sb);
        EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1485,36 +1501,33 @@ static int ext4_check_descriptors(struct super_block *sb)
                block_bitmap = ext4_block_bitmap(sb, gdp);
                if (block_bitmap < first_block || block_bitmap > last_block)
                {
-                        ext4_error (sb, "ext4_check_descriptors",
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                    "Block bitmap for group %lu"
+                               "Block bitmap for group %lu not in group "
-                                    " not in group (block %llu)!",
+                               "(block %llu)!", i, block_bitmap);
-                                    i, block_bitmap);
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
                if (inode_bitmap < first_block || inode_bitmap > last_block)
                {
-                        ext4_error (sb, "ext4_check_descriptors",
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                    "Inode bitmap for group %lu"
+                               "Inode bitmap for group %lu not in group "
-                                    " not in group (block %llu)!",
+                               "(block %llu)!", i, inode_bitmap);
-                                    i, inode_bitmap);
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block)
                {
-                        ext4_error (sb, "ext4_check_descriptors",
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                    "Inode table for group %lu"
+                               "Inode table for group %lu not in group "
-                                    " not in group (block %llu)!",
+                               "(block %llu)!", i, inode_table);
-                                    i, inode_table);
                        return 0;
                }
                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
-                        ext4_error(sb, __FUNCTION__,
+                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                                   "Checksum for group %lu failed (%u!=%u)\n",
+                               "Checksum for group %lu failed (%u!=%u)\n",
-                                    i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+                               i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
-                                    gdp)), le16_to_cpu(gdp->bg_checksum));
+                               gdp)), le16_to_cpu(gdp->bg_checksum));
                        return 0;
                }
                if (!flexbg_flag)
@@ -1585,7 +1598,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
                        int ret = ext4_quota_on_mount(sb, i);
                        if (ret < 0)
                                printk(KERN_ERR
-                                        "EXT4-fs: Cannot turn on journalled "
+                                        "EXT4-fs: Cannot turn on journaled "
                                        "quota: error %d\n", ret);
                }
        }
@@ -1594,8 +1607,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
        while (es->s_last_orphan) {
                struct inode *inode;
-                if (!(inode =
+                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
-                      ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+                if (IS_ERR(inode)) {
                        es->s_last_orphan = 0;
                        break;
                }
@@ -1605,7 +1618,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
                if (inode->i_nlink) {
                        printk(KERN_DEBUG
                                "%s: truncating inode %lu to %Ld bytes\n",
-                                __FUNCTION__, inode->i_ino, inode->i_size);
+                                __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
                                  inode->i_ino, inode->i_size);
                        ext4_truncate(inode);
@@ -1613,7 +1626,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
                } else {
                        printk(KERN_DEBUG
                                "%s: deleting unreferenced inode %lu\n",
-                                __FUNCTION__, inode->i_ino);
+                                __func__, inode->i_ino);
                        jbd_debug(2, "deleting unreferenced inode %lu\n",
                                  inode->i_ino);
                        nr_orphans++;
@@ -2699,9 +2712,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
                char nbuf[16];
                errstr = ext4_decode_error(sb, j_errno, nbuf);
-                ext4_warning(sb, __FUNCTION__, "Filesystem error recorded "
+                ext4_warning(sb, __func__, "Filesystem error recorded "
                             "from previous mount: %s", errstr);
-                ext4_warning(sb, __FUNCTION__, "Marking fs in need of "
+                ext4_warning(sb, __func__, "Marking fs in need of "
                             "filesystem check.");
                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2828,7 +2841,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
        }
        if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
-                ext4_abort(sb, __FUNCTION__, "Abort forced by user");
+                ext4_abort(sb, __func__, "Abort forced by user");
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3040,8 +3053,14 @@ static int ext4_dquot_drop(struct inode *inode)
        /* We may delete quota structure so we need to reserve enough blocks */
        handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
-        if (IS_ERR(handle))
+        if (IS_ERR(handle)) {
+                /*
+                 * We call dquot_drop() anyway to at least release references
+                 * to quota structures so that umount does not hang.
+                 */
+                dquot_drop(inode);
                return PTR_ERR(handle);
+        }
        ret = dquot_drop(inode);
        err = ext4_journal_stop(handle);
        if (!ret)
@@ -3104,7 +3123,7 @@ static int ext4_release_dquot(struct dquot *dquot)
 static int ext4_mark_dquot_dirty(struct dquot *dquot)
 {
-        /* Are we journalling quotas? */
+        /* Are we journaling quotas? */
        if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
            EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
                dquot_mark_dquot_dirty(dquot);
@@ -3151,23 +3170,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
        if (!test_opt(sb, QUOTA))
                return -EINVAL;
-        /* Not journalling quota? */
+        /* When remounting, no checks are needed and in fact, path is NULL */
-        if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] &&
+        if (remount)
-            !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
                return vfs_quota_on(sb, type, format_id, path, remount);
        err = path_lookup(path, LOOKUP_FOLLOW, &nd);
        if (err)
                return err;
        /* Quotafile not on the same filesystem? */
        if (nd.path.mnt->mnt_sb != sb) {
                path_put(&nd.path);
                return -EXDEV;
        }
-        /* Quotafile not of fs root? */
+        /* Journaling quota? */
-        if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+        if (EXT4_SB(sb)->s_qf_names[type]) {
-                printk(KERN_WARNING
+                /* Quotafile not of fs root? */
-                        "EXT4-fs: Quota file not on filesystem root. "
+                if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-                        "Journalled quota will not work.\n");
+                        printk(KERN_WARNING
+                                "EXT4-fs: Quota file not on filesystem root. "
+                                "Journaled quota will not work.\n");
+        }
+        /*
+         * When we journal data on quota file, we have to flush journal to see
+         * all updates to the file when we bypass pagecache...
+         */
+        if (ext4_should_journal_data(nd.path.dentry->d_inode)) {
+                /*
+                 * We don't need to lock updates but journal_flush() could
+                 * otherwise be livelocked...
+                 */
+                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+                jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+        }
        path_put(&nd.path);
        return vfs_quota_on(sb, type, format_id, path, remount);
 }
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
 #include <linux/fs.h>
 #include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/namei.h>
+#include "ext4.h"
 #include "xattr.h"
 static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..ff08633f398e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
 #include "xattr.h"
 #include "acl.h"
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
                                                 struct mb_cache_entry **);
 static void ext4_xattr_rehash(struct ext4_xattr_header *,
                              struct ext4_xattr_entry *);
+static int ext4_xattr_list(struct inode *inode, char *buffer,
+                           size_t buffer_size);
 static struct mb_cache *ext4_xattr_cache;
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
-bad_block:      ext4_error(inode->i_sb, __FUNCTION__,
+bad_block:      ext4_error(inode->i_sb, __func__,
                           "inode %lu: bad block %llu", inode->i_ino,
                           EXT4_I(inode)->i_file_acl);
                error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
        ea_bdebug(bh, "b_count=%d, refcount=%d",
                atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
        if (ext4_xattr_check_block(bh)) {
-                ext4_error(inode->i_sb, __FUNCTION__,
+                ext4_error(inode->i_sb, __func__,
                           "inode %lu: bad block %llu", inode->i_ino,
                           EXT4_I(inode)->i_file_acl);
                error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
-int
+static int
 ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
        int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                get_bh(bh);
                ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
        } else {
-                BHDR(bh)->h_refcount = cpu_to_le32(
+                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
-                                le32_to_cpu(BHDR(bh)->h_refcount) - 1);
                error = ext4_journal_dirty_metadata(handle, bh);
                if (IS_SYNC(inode))
                        handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
                        atomic_read(&(bs->bh->b_count)),
                        le32_to_cpu(BHDR(bs->bh)->h_refcount));
                if (ext4_xattr_check_block(bs->bh)) {
-                        ext4_error(sb, __FUNCTION__,
+                        ext4_error(sb, __func__,
                                "inode %lu: bad block %llu", inode->i_ino,
                                EXT4_I(inode)->i_file_acl);
                        error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                                ce = NULL;
                        }
                        ea_bdebug(bs->bh, "cloning");
-                        s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
+                        s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
                        error = -ENOMEM;
                        if (s->base == NULL)
                                goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
                }
        } else {
                /* Allocate a buffer where we construct the new block. */
-                s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
+                s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
                /* assert(header == s->base) */
                error = -ENOMEM;
                if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
                                if (error)
                                        goto cleanup_dquot;
                                lock_buffer(new_bh);
-                                BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+                                le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
-                                        le32_to_cpu(BHDR(new_bh)->h_refcount));
                                ea_bdebug(new_bh, "reusing; refcount now=%d",
                                        le32_to_cpu(BHDR(new_bh)->h_refcount));
                                unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
                        get_bh(new_bh);
                } else {
                        /* We need to allocate a new block */
-                        ext4_fsblk_t goal = le32_to_cpu(
+                        ext4_fsblk_t goal = ext4_group_first_block_no(sb,
-                                        EXT4_SB(sb)->s_es->s_first_data_block) +
+                                                EXT4_I(inode)->i_block_group);
-                                (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
-                                EXT4_BLOCKS_PER_GROUP(sb);
                        ext4_fsblk_t block = ext4_new_block(handle, inode,
                                                        goal, &error);
                        if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
        goto cleanup;
 bad_block:
-        ext4_error(inode->i_sb, __FUNCTION__,
+        ext4_error(inode->i_sb, __func__,
                   "inode %lu: bad block %llu", inode->i_ino,
                   EXT4_I(inode)->i_file_acl);
        goto cleanup;
@@ -1011,6 +1009,11 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
                        i.value = NULL;
                        error = ext4_xattr_block_set(handle, inode, &i, &bs);
                } else if (error == -ENOSPC) {
+                        if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
+                                error = ext4_xattr_block_find(inode, &i, &bs);
+                                if (error)
+                                        goto cleanup;
+                        }
                        error = ext4_xattr_block_set(handle, inode, &i, &bs);
                        if (error)
                                goto cleanup;
@@ -1166,7 +1169,7 @@ retry:
                if (!bh)
                        goto cleanup;
                if (ext4_xattr_check_block(bh)) {
-                        ext4_error(inode->i_sb, __FUNCTION__,
+                        ext4_error(inode->i_sb, __func__,
                                "inode %lu: bad block %llu", inode->i_ino,
                                EXT4_I(inode)->i_file_acl);
                        error = -EIO;
@@ -1341,14 +1344,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
                goto cleanup;
        bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
        if (!bh) {
-                ext4_error(inode->i_sb, __FUNCTION__,
+                ext4_error(inode->i_sb, __func__,
                        "inode %lu: block %llu read error", inode->i_ino,
                        EXT4_I(inode)->i_file_acl);
                goto cleanup;
        }
        if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
            BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-                ext4_error(inode->i_sb, __FUNCTION__,
+                ext4_error(inode->i_sb, __func__,
                        "inode %lu: bad block %llu", inode->i_ino,
                        EXT4_I(inode)->i_file_acl);
                goto cleanup;
@@ -1475,7 +1478,7 @@ again:
                }
                bh = sb_bread(inode->i_sb, ce->e_block);
                if (!bh) {
-                        ext4_error(inode->i_sb, __FUNCTION__,
+                        ext4_error(inode->i_sb, __func__,
                                "inode %lu: block %lu read error",
                                inode->i_ino, (unsigned long) ce->e_block);
                } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext4_xattr_list(struct inode *, char *, size_t);
 extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
 extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 }
 static inline int
-ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
-{
-        return -EOPNOTSUPP;
-}
-static inline int
 ext4_xattr_set(struct inode *inode, int name_index, const char *name,
               const void *value, size_t size, int flags)
 {
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
 #include <linux/security.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
 #include "xattr.h"
 static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
 #include <linux/string.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #include "xattr.h"
 #define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
-#include <linux/ext4_fs.h>
+#include "ext4.h"
 #include "xattr.h"
 #define XATTR_USER_PREFIX "user."
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 639b3b4f86d1..fda25479af26 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
                /* prevent the infinite loop of cluster chain */
                if (*fclus > limit) {
                        fat_fs_panic(sb, "%s: detected the cluster chain loop"
-                                     " (i_pos %lld)", __FUNCTION__,
+                                     " (i_pos %lld)", __func__,
                                     MSDOS_I(inode)->i_pos);
                        nr = -EIO;
                        goto out;
@@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
                        goto out;
                else if (nr == FAT_ENT_FREE) {
                        fat_fs_panic(sb, "%s: invalid cluster chain"
-                                     " (i_pos %lld)", __FUNCTION__,
+                                     " (i_pos %lld)", __func__,
                                     MSDOS_I(inode)->i_pos);
                        nr = -EIO;
                        goto out;
@@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
                return ret;
        else if (ret == FAT_ENT_EOF) {
                fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
-                             __FUNCTION__, MSDOS_I(inode)->i_pos);
+                             __func__, MSDOS_I(inode)->i_pos);
                return -EIO;
        }
        return dclus;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 13ab763cc510..302e95c4af7e 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -546,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
                        goto error;
                } else if (cluster == FAT_ENT_FREE) {
                        fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
-                                     __FUNCTION__);
+                                     __func__);
                        err = -EIO;
                        goto error;
                }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index d604bb132422..27cc1164ec36 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -208,7 +208,7 @@ static int fat_free(struct inode *inode, int skip)
                } else if (ret == FAT_ENT_FREE) {
                        fat_fs_panic(sb,
                                     "%s: invalid cluster chain (i_pos %lld)",
-                                     __FUNCTION__, MSDOS_I(inode)->i_pos);
+                                     __func__, MSDOS_I(inode)->i_pos);
                        ret = -EIO;
                } else if (ret > 0) {
                        err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5f522a55b596..4e0a3dd9d677 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1222,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
                brelse(bh);
                goto out_invalid;
        }
-        logical_sector_size =
+        logical_sector_size = get_unaligned_le16(&b->sector_size);
-                le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
        if (!is_power_of_2(logical_sector_size)
            || (logical_sector_size < 512)
            || (logical_sector_size > 4096)) {
@@ -1322,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
        sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
-        sbi->dir_entries =
+        sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
-                le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries));
        if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
                if (!silent)
                        printk(KERN_ERR "FAT: bogus directroy-entries per block"
@@ -1335,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
        rootdir_sectors = sbi->dir_entries
                * sizeof(struct msdos_dir_entry) / sb->s_blocksize;
        sbi->data_start = sbi->dir_start + rootdir_sectors;
-        total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors));
+        total_sectors = get_unaligned_le16(&b->sectors);
        if (total_sectors == 0)
                total_sectors = le32_to_cpu(b->total_sect);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3f3ac630ccde..bfd776509a72 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/capability.h>
 #include <linux/dnotify.h>
 #include <linux/smp_lock.h>
diff --git a/fs/file.c b/fs/file.c
index 5110acb1c9ef..4c6f0ea12c41 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
@@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
        nr /= (1024 / sizeof(struct file *));
        nr = roundup_pow_of_two(nr + 1);
        nr *= (1024 / sizeof(struct file *));
-        if (nr > sysctl_nr_open)
+        /*
-                nr = sysctl_nr_open;
+         * Note that this can drive nr *below* what we had passed if sysctl_nr_open
+         * had been set lower between the check in expand_files() and here.  Deal
+         * with that in caller, it's cheaper that way.
+         *
+         * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
+         * bitmaps handling below becomes unpleasant, to put it mildly...
+         */
+        if (unlikely(nr > sysctl_nr_open))
+                nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
        fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
        if (!fdt)
@@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)
        if (!new_fdt)
                return -ENOMEM;
        /*
+         * extremely unlikely race - sysctl_nr_open decreased between the check in
+         * caller and alloc_fdtable().  Cheaper to catch it here...
+         */
+        if (unlikely(new_fdt->max_fds <= nr)) {
+                free_fdarr(new_fdt);
+                free_fdset(new_fdt);
+                kfree(new_fdt);
+                return -EMFILE;
+        }
+        /*
         * Check again since another task may have expanded the fd table while
         * we dropped the lock
         */
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b872251..83084225b4c3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 2b46064f66b2..50ab5eecb99b 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,7 +50,11 @@ extern daddr_t			vxfs_bmap1(struct inode *, long);
 /* vxfs_fshead.c */
 extern int                      vxfs_read_fshead(struct super_block *);
+/* vxfs_immed.c */
+extern const struct inode_operations vxfs_immed_symlink_iops;
 /* vxfs_inode.c */
+extern const struct address_space_operations vxfs_immed_aops;
 extern struct kmem_cache        *vxfs_inode_cachep;
 extern void                     vxfs_dumpi(struct vxfs_inode_info *, ino_t);
 extern struct inode *           vxfs_get_fake_inode(struct super_block *,
@@ -69,6 +73,7 @@ extern const struct file_operations	vxfs_dir_operations;
 extern int                      vxfs_read_olt(struct super_block *, u_long);
 /* vxfs_subr.c */
+extern const struct address_space_operations vxfs_aops;
 extern struct page *            vxfs_get_page(struct address_space *, u_long);
 extern void                     vxfs_put_page(struct page *);
 extern struct buffer_head *     vxfs_bread(struct inode *, int);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8a5959a61ba9..c36aeaf92e41 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -35,6 +35,7 @@
 #include <linux/namei.h>
 #include "vxfs.h"
+#include "vxfs_extern.h"
 #include "vxfs_inode.h"
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ad88d2364bc2..9f3f2ceb73f0 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,11 +41,6 @@
 #include "vxfs_extern.h"
-extern const struct address_space_operations vxfs_aops;
-extern const struct address_space_operations vxfs_immed_aops;
-extern const struct inode_operations vxfs_immed_symlink_iops;
 struct kmem_cache               *vxfs_inode_cachep;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 06557679ca41..ae45f77765c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,45 @@
 #include <linux/buffer_head.h>
 #include "internal.h"
+/**
+ * writeback_acquire - attempt to get exclusive writeback access to a device
+ * @bdi: the device's backing_dev_info structure
+ *
+ * It is a waste of resources to have more than one pdflush thread blocked on
+ * a single request queue.  Exclusion at the request_queue level is obtained
+ * via a flag in the request_queue's backing_dev_info.state.
+ *
+ * Non-request_queue-backed address_spaces will share default_backing_dev_info,
+ * unless they implement their own.  Which is somewhat inefficient, as this
+ * may prevent concurrent writeback against multiple devices.
+ */
+static int writeback_acquire(struct backing_dev_info *bdi)
+{
+        return !test_and_set_bit(BDI_pdflush, &bdi->state);
+}
+/**
+ * writeback_in_progress - determine whether there is writeback in progress
+ * @bdi: the device's backing_dev_info structure.
+ *
+ * Determine whether there is writeback in progress against a backing device.
+ */
+int writeback_in_progress(struct backing_dev_info *bdi)
+{
+        return test_bit(BDI_pdflush, &bdi->state);
+}
+/**
+ * writeback_release - relinquish exclusive writeback access against a device.
+ * @bdi: the device's backing_dev_info structure
+ */
+static void writeback_release(struct backing_dev_info *bdi)
+{
+        BUG_ON(!writeback_in_progress(bdi));
+        clear_bit(BDI_pdflush, &bdi->state);
+}
 /**
 *      __mark_inode_dirty -    internal function
 *      @inode: inode to mark
@@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
        return err;
 }
 EXPORT_SYMBOL(generic_osync_inode);
-/**
- * writeback_acquire - attempt to get exclusive writeback access to a device
- * @bdi: the device's backing_dev_info structure
- *
- * It is a waste of resources to have more than one pdflush thread blocked on
- * a single request queue.  Exclusion at the request_queue level is obtained
- * via a flag in the request_queue's backing_dev_info.state.
- *
- * Non-request_queue-backed address_spaces will share default_backing_dev_info,
- * unless they implement their own.  Which is somewhat inefficient, as this
- * may prevent concurrent writeback against multiple devices.
- */
-int writeback_acquire(struct backing_dev_info *bdi)
-{
-        return !test_and_set_bit(BDI_pdflush, &bdi->state);
-}
-/**
- * writeback_in_progress - determine whether there is writeback in progress
- * @bdi: the device's backing_dev_info structure.
- *
- * Determine whether there is writeback in progress against a backing device.
- */
-int writeback_in_progress(struct backing_dev_info *bdi)
-{
-        return test_bit(BDI_pdflush, &bdi->state);
-}
-/**
- * writeback_release - relinquish exclusive writeback access against a device.
- * @bdi: the device's backing_dev_info structure
- */
-void writeback_release(struct backing_dev_info *bdi)
-{
-        BUG_ON(!writeback_in_progress(bdi));
-        clear_bit(BDI_pdflush, &bdi->state);
-}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 105d4a271e07..4f3cab321415 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
        parent = fuse_control_sb->s_root;
        inc_nlink(parent->d_inode);
-        sprintf(name, "%llu", (unsigned long long) fc->id);
+        sprintf(name, "%u", fc->dev);
        parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
                                     &simple_dir_inode_operations,
                                     &simple_dir_operations);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index af639807524e..87250b6a8682 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void)
        return req;
 }
+struct fuse_req *fuse_request_alloc_nofs(void)
+{
+        struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+        if (req)
+                fuse_request_init(req);
+        return req;
+}
 void fuse_request_free(struct fuse_req *req)
 {
        kmem_cache_free(fuse_req_cachep, req);
@@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 static void wait_answer_interruptible(struct fuse_conn *fc,
                                      struct fuse_req *req)
+        __releases(fc->lock) __acquires(fc->lock)
 {
        if (signal_pending(current))
                return;
@@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
        kill_fasync(&fc->fasync, SIGIO, POLL_IN);
 }
-/* Called with fc->lock held.  Releases, and then reacquires it. */
 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+        __releases(fc->lock) __acquires(fc->lock)
 {
        if (!fc->no_interrupt) {
                /* Any signal may interrupt this */
@@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
 }
 /*
+ * Called under fc->lock
+ *
+ * fc->connected must have been checked previously
+ */
+void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
+{
+        req->isreply = 1;
+        request_send_nowait_locked(fc, req);
+}
+/*
 * Lock the request.  Up to the next unlock_request() there mustn't be
 * anything that could cause a page-fault.  If the request was already
 * aborted bail out.
@@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
 * locked).
 */
 static void end_io_requests(struct fuse_conn *fc)
+        __releases(fc->lock) __acquires(fc->lock)
 {
        while (!list_empty(&fc->io)) {
                struct fuse_req *req =
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c4807b3fc8a3..2060bf06b906 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
        req->out.args[0].value = outarg;
 }
-static u64 fuse_get_attr_version(struct fuse_conn *fc)
+u64 fuse_get_attr_version(struct fuse_conn *fc)
 {
        u64 curr_version;
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
 }
 /*
+ * Prevent concurrent writepages on inode
+ *
+ * This is done by adding a negative bias to the inode write counter
+ * and waiting for all pending writes to finish.
+ */
+void fuse_set_nowrite(struct inode *inode)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        BUG_ON(!mutex_is_locked(&inode->i_mutex));
+        spin_lock(&fc->lock);
+        BUG_ON(fi->writectr < 0);
+        fi->writectr += FUSE_NOWRITE;
+        spin_unlock(&fc->lock);
+        wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
+}
+/*
+ * Allow writepages on inode
+ *
+ * Remove the bias from the writecounter and send any queued
+ * writepages.
+ */
+static void __fuse_release_nowrite(struct inode *inode)
+{
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        BUG_ON(fi->writectr != FUSE_NOWRITE);
+        fi->writectr = 0;
+        fuse_flush_writepages(inode);
+}
+void fuse_release_nowrite(struct inode *inode)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        spin_lock(&fc->lock);
+        __fuse_release_nowrite(inode);
+        spin_unlock(&fc->lock);
+}
+/*
 * Set attributes, and at the same time refresh them.
 *
 * Truncation is slightly complicated, because the 'truncate' request
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
        struct fuse_req *req;
        struct fuse_setattr_in inarg;
        struct fuse_attr_out outarg;
+        bool is_truncate = false;
+        loff_t oldsize;
        int err;
        if (!fuse_allow_task(fc, current))
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
                        send_sig(SIGXFSZ, current, 0);
                        return -EFBIG;
                }
+                is_truncate = true;
        }
        req = fuse_get_req(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
+        if (is_truncate)
+                fuse_set_nowrite(inode);
        memset(&inarg, 0, sizeof(inarg));
        memset(&outarg, 0, sizeof(outarg));
        iattr_to_fattr(attr, &inarg);
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
        if (err) {
                if (err == -EINTR)
                        fuse_invalidate_attr(inode);
-                return err;
+                goto error;
        }
        if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
                make_bad_inode(inode);
-                return -EIO;
+                err = -EIO;
+                goto error;
+        }
+        spin_lock(&fc->lock);
+        fuse_change_attributes_common(inode, &outarg.attr,
+                                      attr_timeout(&outarg));
+        oldsize = inode->i_size;
+        i_size_write(inode, outarg.attr.size);
+        if (is_truncate) {
+                /* NOTE: this may release/reacquire fc->lock */
+                __fuse_release_nowrite(inode);
+        }
+        spin_unlock(&fc->lock);
+        /*
+         * Only call invalidate_inode_pages2() after removing
+         * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
+         */
+        if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+                if (outarg.attr.size < oldsize)
+                        fuse_truncate(inode->i_mapping, outarg.attr.size);
+                invalidate_inode_pages2(inode->i_mapping);
        }
-        fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
        return 0;
+error:
+        if (is_truncate)
+                fuse_release_nowrite(inode);
+        return err;
 }
 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 676b0bc8a86d..8092f0d9fd1f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
        return (u64) v0 + ((u64) v1 << 32);
 }
+/*
+ * Check if page is under writeback
+ *
+ * This is currently done by walking the list of writepage requests
+ * for the inode, which can be pretty inefficient.
+ */
+static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        struct fuse_req *req;
+        bool found = false;
+        spin_lock(&fc->lock);
+        list_for_each_entry(req, &fi->writepages, writepages_entry) {
+                pgoff_t curr_index;
+                BUG_ON(req->inode != inode);
+                curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+                if (curr_index == index) {
+                        found = true;
+                        break;
+                }
+        }
+        spin_unlock(&fc->lock);
+        return found;
+}
+/*
+ * Wait for page writeback to be completed.
+ *
+ * Since fuse doesn't rely on the VM writeback tracking, this has to
+ * use some other means.
+ */
+static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
+{
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
+        return 0;
+}
 static int fuse_flush(struct file *file, fl_owner_t id)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id)
        return err;
 }
+/*
+ * Wait for all pending writepages on the inode to finish.
+ *
+ * This is currently done by blocking further writes with FUSE_NOWRITE
+ * and waiting for all sent writes to complete.
+ *
+ * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
+ * could conflict with truncation.
+ */
+static void fuse_sync_writes(struct inode *inode)
+{
+        fuse_set_nowrite(inode);
+        fuse_release_nowrite(inode);
+}
 int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
                      int isdir)
 {
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
        if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
                return 0;
+        /*
+         * Start writeback against all dirty pages of the inode, then
+         * wait for all outstanding writes, before sending the FSYNC
+         * request.
+         */
+        err = write_inode_now(inode, 0);
+        if (err)
+                return err;
+        fuse_sync_writes(inode);
        req = fuse_get_req(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
 void fuse_read_fill(struct fuse_req *req, struct file *file,
                    struct inode *inode, loff_t pos, size_t count, int opcode)
 {
-        struct fuse_read_in *inarg = &req->misc.read_in;
+        struct fuse_read_in *inarg = &req->misc.read.in;
        struct fuse_file *ff = file->private_data;
        inarg->fh = ff->fh;
@@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
        fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
        if (owner != NULL) {
-                struct fuse_read_in *inarg = &req->misc.read_in;
+                struct fuse_read_in *inarg = &req->misc.read.in;
                inarg->read_flags |= FUSE_READ_LOCKOWNER;
                inarg->lock_owner = fuse_lock_owner_id(fc, owner);
@@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
        return req->out.args[0].size;
 }
+static void fuse_read_update_size(struct inode *inode, loff_t size,
+                                  u64 attr_ver)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        spin_lock(&fc->lock);
+        if (attr_ver == fi->attr_version && size < inode->i_size) {
+                fi->attr_version = ++fc->attr_version;
+                i_size_write(inode, size);
+        }
+        spin_unlock(&fc->lock);
+}
 static int fuse_readpage(struct file *file, struct page *page)
 {
        struct inode *inode = page->mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_req *req;
+        size_t num_read;
+        loff_t pos = page_offset(page);
+        size_t count = PAGE_CACHE_SIZE;
+        u64 attr_ver;
        int err;
        err = -EIO;
        if (is_bad_inode(inode))
                goto out;
+        /*
+         * Page writeback can extend beyond the liftime of the
+         * page-cache page, so make sure we read a properly synced
+         * page.
+         */
+        fuse_wait_on_page_writeback(inode, page->index);
        req = fuse_get_req(fc);
        err = PTR_ERR(req);
        if (IS_ERR(req))
                goto out;
+        attr_ver = fuse_get_attr_version(fc);
        req->out.page_zeroing = 1;
        req->num_pages = 1;
        req->pages[0] = page;
-        fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE,
+        num_read = fuse_send_read(req, file, inode, pos, count, NULL);
-                       NULL);
        err = req->out.h.error;
        fuse_put_request(fc, req);
-        if (!err)
+        if (!err) {
+                /*
+                 * Short read means EOF.  If file size is larger, truncate it
+                 */
+                if (num_read < count)
+                        fuse_read_update_size(inode, pos + num_read, attr_ver);
                SetPageUptodate(page);
+        }
        fuse_invalidate_attr(inode); /* atime changed */
 out:
        unlock_page(page);
@@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page)
 static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
 {
        int i;
+        size_t count = req->misc.read.in.size;
+        size_t num_read = req->out.args[0].size;
+        struct inode *inode = req->pages[0]->mapping->host;
+        /*
+         * Short read means EOF.  If file size is larger, truncate it
+         */
+        if (!req->out.h.error && num_read < count) {
+                loff_t pos = page_offset(req->pages[0]) + num_read;
+                fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
+        }
-        fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */
+        fuse_invalidate_attr(inode); /* atime changed */
        for (i = 0; i < req->num_pages; i++) {
                struct page *page = req->pages[i];
@@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
        size_t count = req->num_pages << PAGE_CACHE_SHIFT;
        req->out.page_zeroing = 1;
        fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+        req->misc.read.attr_ver = fuse_get_attr_version(fc);
        if (fc->async_read) {
                struct fuse_file *ff = file->private_data;
                req->ff = fuse_file_get(ff);
@@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page)
        struct inode *inode = data->inode;
        struct fuse_conn *fc = get_fuse_conn(inode);
+        fuse_wait_on_page_writeback(inode, page->index);
        if (req->num_pages &&
            (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
             (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
@@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 }
 static void fuse_write_fill(struct fuse_req *req, struct file *file,
-                            struct inode *inode, loff_t pos, size_t count,
+                            struct fuse_file *ff, struct inode *inode,
-                            int writepage)
+                            loff_t pos, size_t count, int writepage)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_file *ff = file->private_data;
        struct fuse_write_in *inarg = &req->misc.write.in;
        struct fuse_write_out *outarg = &req->misc.write.out;
@@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
        inarg->offset = pos;
        inarg->size = count;
        inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
-        inarg->flags = file->f_flags;
+        inarg->flags = file ? file->f_flags : 0;
        req->in.h.opcode = FUSE_WRITE;
        req->in.h.nodeid = get_node_id(inode);
        req->in.argpages = 1;
@@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
                              fl_owner_t owner)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
-        fuse_write_fill(req, file, inode, pos, count, 0);
+        fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
        if (owner != NULL) {
                struct fuse_write_in *inarg = &req->misc.write.in;
                inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
        return 0;
 }
+static void fuse_write_update_size(struct inode *inode, loff_t pos)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        spin_lock(&fc->lock);
+        fi->attr_version = ++fc->attr_version;
+        if (pos > inode->i_size)
+                i_size_write(inode, pos);
+        spin_unlock(&fc->lock);
+}
 static int fuse_buffered_write(struct file *file, struct inode *inode,
                               loff_t pos, unsigned count, struct page *page)
 {
        int err;
        size_t nres;
        struct fuse_conn *fc = get_fuse_conn(inode);
-        struct fuse_inode *fi = get_fuse_inode(inode);
        unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
        struct fuse_req *req;
        if (is_bad_inode(inode))
                return -EIO;
+        /*
+         * Make sure writepages on the same page are not mixed up with
+         * plain writes.
+         */
+        fuse_wait_on_page_writeback(inode, page->index);
        req = fuse_get_req(fc);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
                err = -EIO;
        if (!err) {
                pos += nres;
-                spin_lock(&fc->lock);
+                fuse_write_update_size(inode, pos);
-                fi->attr_version = ++fc->attr_version;
-                if (pos > inode->i_size)
-                        i_size_write(inode, pos);
-                spin_unlock(&fc->lock);
                if (count == PAGE_CACHE_SIZE)
                        SetPageUptodate(page);
        }
@@ -588,6 +717,200 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
        return res;
 }
+static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
+                                    struct inode *inode, loff_t pos,
+                                    size_t count)
+{
+        size_t res;
+        unsigned offset;
+        unsigned i;
+        for (i = 0; i < req->num_pages; i++)
+                fuse_wait_on_page_writeback(inode, req->pages[i]->index);
+        res = fuse_send_write(req, file, inode, pos, count, NULL);
+        offset = req->page_offset;
+        count = res;
+        for (i = 0; i < req->num_pages; i++) {
+                struct page *page = req->pages[i];
+                if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+                        SetPageUptodate(page);
+                if (count > PAGE_CACHE_SIZE - offset)
+                        count -= PAGE_CACHE_SIZE - offset;
+                else
+                        count = 0;
+                offset = 0;
+                unlock_page(page);
+                page_cache_release(page);
+        }
+        return res;
+}
+static ssize_t fuse_fill_write_pages(struct fuse_req *req,
+                               struct address_space *mapping,
+                               struct iov_iter *ii, loff_t pos)
+{
+        struct fuse_conn *fc = get_fuse_conn(mapping->host);
+        unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+        size_t count = 0;
+        int err;
+        req->page_offset = offset;
+        do {
+                size_t tmp;
+                struct page *page;
+                pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+                size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+                                     iov_iter_count(ii));
+                bytes = min_t(size_t, bytes, fc->max_write - count);
+ again:
+                err = -EFAULT;
+                if (iov_iter_fault_in_readable(ii, bytes))
+                        break;
+                err = -ENOMEM;
+                page = __grab_cache_page(mapping, index);
+                if (!page)
+                        break;
+                pagefault_disable();
+                tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+                pagefault_enable();
+                flush_dcache_page(page);
+                if (!tmp) {
+                        unlock_page(page);
+                        page_cache_release(page);
+                        bytes = min(bytes, iov_iter_single_seg_count(ii));
+                        goto again;
+                }
+                err = 0;
+                req->pages[req->num_pages] = page;
+                req->num_pages++;
+                iov_iter_advance(ii, tmp);
+                count += tmp;
+                pos += tmp;
+                offset += tmp;
+                if (offset == PAGE_CACHE_SIZE)
+                        offset = 0;
+                if (!fc->big_writes)
+                        break;
+        } while (iov_iter_count(ii) && count < fc->max_write &&
+                 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+        return count > 0 ? count : err;
+}
+static ssize_t fuse_perform_write(struct file *file,
+                                  struct address_space *mapping,
+                                  struct iov_iter *ii, loff_t pos)
+{
+        struct inode *inode = mapping->host;
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        int err = 0;
+        ssize_t res = 0;
+        if (is_bad_inode(inode))
+                return -EIO;
+        do {
+                struct fuse_req *req;
+                ssize_t count;
+                req = fuse_get_req(fc);
+                if (IS_ERR(req)) {
+                        err = PTR_ERR(req);
+                        break;
+                }
+                count = fuse_fill_write_pages(req, mapping, ii, pos);
+                if (count <= 0) {
+                        err = count;
+                } else {
+                        size_t num_written;
+                        num_written = fuse_send_write_pages(req, file, inode,
+                                                            pos, count);
+                        err = req->out.h.error;
+                        if (!err) {
+                                res += num_written;
+                                pos += num_written;
+                                /* break out of the loop on short write */
+                                if (num_written != count)
+                                        err = -EIO;
+                        }
+                }
+                fuse_put_request(fc, req);
+        } while (!err && iov_iter_count(ii));
+        if (res > 0)
+                fuse_write_update_size(inode, pos);
+        fuse_invalidate_attr(inode);
+        return res > 0 ? res : err;
+}
+static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+                                   unsigned long nr_segs, loff_t pos)
+{
+        struct file *file = iocb->ki_filp;
+        struct address_space *mapping = file->f_mapping;
+        size_t count = 0;
+        ssize_t written = 0;
+        struct inode *inode = mapping->host;
+        ssize_t err;
+        struct iov_iter i;
+        WARN_ON(iocb->ki_pos != pos);
+        err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+        if (err)
+                return err;
+        mutex_lock(&inode->i_mutex);
+        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+        /* We can write back this queue in page reclaim */
+        current->backing_dev_info = mapping->backing_dev_info;
+        err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+        if (err)
+                goto out;
+        if (count == 0)
+                goto out;
+        err = remove_suid(file->f_path.dentry);
+        if (err)
+                goto out;
+        file_update_time(file);
+        iov_iter_init(&i, iov, nr_segs, count, 0);
+        written = fuse_perform_write(file, mapping, &i, pos);
+        if (written >= 0)
+                iocb->ki_pos = pos + written;
+out:
+        current->backing_dev_info = NULL;
+        mutex_unlock(&inode->i_mutex);
+        return written ? written : err;
+}
 static void fuse_release_user_pages(struct fuse_req *req, int write)
 {
        unsigned i;
@@ -613,7 +936,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
        nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
        npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-        npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ);
+        npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
        down_read(&current->mm->mmap_sem);
        npages = get_user_pages(current, current->mm, user_addr, npages, write,
                                0, req->pages, NULL);
@@ -645,14 +968,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
        while (count) {
                size_t nres;
-                size_t nbytes = min(count, nmax);
+                size_t nbytes_limit = min(count, nmax);
-                int err = fuse_get_user_pages(req, buf, nbytes, !write);
+                size_t nbytes;
+                int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
                if (err) {
                        res = err;
                        break;
                }
                nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
-                nbytes = min(count, nbytes);
+                nbytes = min(nbytes_limit, nbytes);
                if (write)
                        nres = fuse_send_write(req, file, inode, pos, nbytes,
                                               current->files);
@@ -683,12 +1007,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
        }
        fuse_put_request(fc, req);
        if (res > 0) {
-                if (write) {
+                if (write)
-                        spin_lock(&fc->lock);
+                        fuse_write_update_size(inode, pos);
-                        if (pos > inode->i_size)
-                                i_size_write(inode, pos);
-                        spin_unlock(&fc->lock);
-                }
                *ppos = pos;
        }
        fuse_invalidate_attr(inode);
@@ -716,21 +1036,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
        return res;
 }
-static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
 {
-        if ((vma->vm_flags & VM_SHARED)) {
+        __free_page(req->pages[0]);
-                if ((vma->vm_flags & VM_WRITE))
+        fuse_file_put(req->ff);
-                        return -ENODEV;
+        fuse_put_request(fc, req);
-                else
+}
-                        vma->vm_flags &= ~VM_MAYWRITE;
+static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
+{
+        struct inode *inode = req->inode;
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+        list_del(&req->writepages_entry);
+        dec_bdi_stat(bdi, BDI_WRITEBACK);
+        dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
+        bdi_writeout_inc(bdi);
+        wake_up(&fi->page_waitq);
+}
+/* Called under fc->lock, may release and reacquire it */
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+{
+        struct fuse_inode *fi = get_fuse_inode(req->inode);
+        loff_t size = i_size_read(req->inode);
+        struct fuse_write_in *inarg = &req->misc.write.in;
+        if (!fc->connected)
+                goto out_free;
+        if (inarg->offset + PAGE_CACHE_SIZE <= size) {
+                inarg->size = PAGE_CACHE_SIZE;
+        } else if (inarg->offset < size) {
+                inarg->size = size & (PAGE_CACHE_SIZE - 1);
+        } else {
+                /* Got truncated off completely */
+                goto out_free;
+        }
+        req->in.args[1].size = inarg->size;
+        fi->writectr++;
+        request_send_background_locked(fc, req);
+        return;
+ out_free:
+        fuse_writepage_finish(fc, req);
+        spin_unlock(&fc->lock);
+        fuse_writepage_free(fc, req);
+        spin_lock(&fc->lock);
+}
+/*
+ * If fi->writectr is positive (no truncate or fsync going on) send
+ * all queued writepage requests.
+ *
+ * Called with fc->lock
+ */
+void fuse_flush_writepages(struct inode *inode)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        struct fuse_req *req;
+        while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
+                req = list_entry(fi->queued_writes.next, struct fuse_req, list);
+                list_del_init(&req->list);
+                fuse_send_writepage(fc, req);
+        }
+}
+static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+        struct inode *inode = req->inode;
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        mapping_set_error(inode->i_mapping, req->out.h.error);
+        spin_lock(&fc->lock);
+        fi->writectr--;
+        fuse_writepage_finish(fc, req);
+        spin_unlock(&fc->lock);
+        fuse_writepage_free(fc, req);
+}
+static int fuse_writepage_locked(struct page *page)
+{
+        struct address_space *mapping = page->mapping;
+        struct inode *inode = mapping->host;
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        struct fuse_req *req;
+        struct fuse_file *ff;
+        struct page *tmp_page;
+        set_page_writeback(page);
+        req = fuse_request_alloc_nofs();
+        if (!req)
+                goto err;
+        tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+        if (!tmp_page)
+                goto err_free;
+        spin_lock(&fc->lock);
+        BUG_ON(list_empty(&fi->write_files));
+        ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
+        req->ff = fuse_file_get(ff);
+        spin_unlock(&fc->lock);
+        fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+        copy_highpage(tmp_page, page);
+        req->num_pages = 1;
+        req->pages[0] = tmp_page;
+        req->page_offset = 0;
+        req->end = fuse_writepage_end;
+        req->inode = inode;
+        inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
+        inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+        end_page_writeback(page);
+        spin_lock(&fc->lock);
+        list_add(&req->writepages_entry, &fi->writepages);
+        list_add_tail(&req->list, &fi->queued_writes);
+        fuse_flush_writepages(inode);
+        spin_unlock(&fc->lock);
+        return 0;
+err_free:
+        fuse_request_free(req);
+err:
+        end_page_writeback(page);
+        return -ENOMEM;
+}
+static int fuse_writepage(struct page *page, struct writeback_control *wbc)
+{
+        int err;
+        err = fuse_writepage_locked(page);
+        unlock_page(page);
+        return err;
+}
+static int fuse_launder_page(struct page *page)
+{
+        int err = 0;
+        if (clear_page_dirty_for_io(page)) {
+                struct inode *inode = page->mapping->host;
+                err = fuse_writepage_locked(page);
+                if (!err)
+                        fuse_wait_on_page_writeback(inode, page->index);
        }
-        return generic_file_mmap(file, vma);
+        return err;
 }
-static int fuse_set_page_dirty(struct page *page)
+/*
+ * Write back dirty pages now, because there may not be any suitable
+ * open files later
+ */
+static void fuse_vma_close(struct vm_area_struct *vma)
 {
-        printk("fuse_set_page_dirty: should not happen\n");
+        filemap_write_and_wait(vma->vm_file->f_mapping);
-        dump_stack();
+}
+/*
+ * Wait for writeback against this page to complete before allowing it
+ * to be marked dirty again, and hence written back again, possibly
+ * before the previous writepage completed.
+ *
+ * Block here, instead of in ->writepage(), so that the userspace fs
+ * can only block processes actually operating on the filesystem.
+ *
+ * Otherwise unprivileged userspace fs would be able to block
+ * unrelated:
+ *
+ * - page migration
+ * - sync(2)
+ * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
+ */
+static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+        /*
+         * Don't use page->mapping as it may become NULL from a
+         * concurrent truncate.
+         */
+        struct inode *inode = vma->vm_file->f_mapping->host;
+        fuse_wait_on_page_writeback(inode, page->index);
+        return 0;
+}
+static struct vm_operations_struct fuse_file_vm_ops = {
+        .close          = fuse_vma_close,
+        .fault          = filemap_fault,
+        .page_mkwrite   = fuse_page_mkwrite,
+};
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+        if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
+                struct inode *inode = file->f_dentry->d_inode;
+                struct fuse_conn *fc = get_fuse_conn(inode);
+                struct fuse_inode *fi = get_fuse_inode(inode);
+                struct fuse_file *ff = file->private_data;
+                /*
+                 * file may be written through mmap, so chain it onto the
+                 * inodes's write_file list
+                 */
+                spin_lock(&fc->lock);
+                if (list_empty(&ff->write_entry))
+                        list_add(&ff->write_entry, &fi->write_files);
+                spin_unlock(&fc->lock);
+        }
+        file_accessed(file);
+        vma->vm_ops = &fuse_file_vm_ops;
        return 0;
 }
@@ -909,12 +1433,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
        return err ? 0 : outarg.block;
 }
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+{
+        loff_t retval;
+        struct inode *inode = file->f_path.dentry->d_inode;
+        mutex_lock(&inode->i_mutex);
+        switch (origin) {
+        case SEEK_END:
+                offset += i_size_read(inode);
+                break;
+        case SEEK_CUR:
+                offset += file->f_pos;
+        }
+        retval = -EINVAL;
+        if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
+                if (offset != file->f_pos) {
+                        file->f_pos = offset;
+                        file->f_version = 0;
+                }
+                retval = offset;
+        }
+        mutex_unlock(&inode->i_mutex);
+        return retval;
+}
 static const struct file_operations fuse_file_operations = {
-        .llseek         = generic_file_llseek,
+        .llseek         = fuse_file_llseek,
        .read           = do_sync_read,
        .aio_read       = fuse_file_aio_read,
        .write          = do_sync_write,
-        .aio_write      = generic_file_aio_write,
+        .aio_write      = fuse_file_aio_write,
        .mmap           = fuse_file_mmap,
        .open           = fuse_open,
        .flush          = fuse_flush,
@@ -926,7 +1475,7 @@ static const struct file_operations fuse_file_operations = {
 };
 static const struct file_operations fuse_direct_io_file_operations = {
-        .llseek         = generic_file_llseek,
+        .llseek         = fuse_file_llseek,
        .read           = fuse_direct_read,
        .write          = fuse_direct_write,
        .open           = fuse_open,
@@ -940,10 +1489,12 @@ static const struct file_operations fuse_direct_io_file_operations = {
 static const struct address_space_operations fuse_file_aops  = {
        .readpage       = fuse_readpage,
+        .writepage      = fuse_writepage,
+        .launder_page   = fuse_launder_page,
        .write_begin    = fuse_write_begin,
        .write_end      = fuse_write_end,
        .readpages      = fuse_readpages,
-        .set_page_dirty = fuse_set_page_dirty,
+        .set_page_dirty = __set_page_dirty_nobuffers,
        .bmap           = fuse_bmap,
 };
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 67aaf6ee38ea..bae948657c4f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 #include <linux/backing-dev.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
@@ -25,6 +26,9 @@
 /** Congestion starts at 75% of maximum */
 #define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
+/** Bias for fi->writectr, meaning new writepages must not be sent */
+#define FUSE_NOWRITE INT_MIN
 /** It could be as large as PATH_MAX, but would that have any uses? */
 #define FUSE_NAME_MAX 1024
@@ -73,6 +77,19 @@ struct fuse_inode {
        /** Files usable in writepage.  Protected by fc->lock */
        struct list_head write_files;
+        /** Writepages pending on truncate or fsync */
+        struct list_head queued_writes;
+        /** Number of sent writes, a negative bias (FUSE_NOWRITE)
+         * means more writes are blocked */
+        int writectr;
+        /** Waitq for writepage completion */
+        wait_queue_head_t page_waitq;
+        /** List of writepage requestst (pending or sent) */
+        struct list_head writepages;
 };
 /** FUSE specific file data */
@@ -222,7 +239,10 @@ struct fuse_req {
                } release;
                struct fuse_init_in init_in;
                struct fuse_init_out init_out;
-                struct fuse_read_in read_in;
+                struct {
+                        struct fuse_read_in in;
+                        u64 attr_ver;
+                } read;
                struct {
                        struct fuse_write_in in;
                        struct fuse_write_out out;
@@ -242,6 +262,12 @@ struct fuse_req {
        /** File used in the request (or NULL) */
        struct fuse_file *ff;
+        /** Inode used in the request or NULL */
+        struct inode *inode;
+        /** Link on fi->writepages */
+        struct list_head writepages_entry;
        /** Request completion callback */
        void (*end)(struct fuse_conn *, struct fuse_req *);
@@ -378,6 +404,9 @@ struct fuse_conn {
        /** Is bmap not implemented by fs? */
        unsigned no_bmap : 1;
+        /** Do multi-page cached writes */
+        unsigned big_writes : 1;
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
@@ -390,8 +419,8 @@ struct fuse_conn {
        /** Entry on the fuse_conn_list */
        struct list_head entry;
-        /** Unique ID */
+        /** Device ID from super block */
-        u64 id;
+        dev_t dev;
        /** Dentries in the control filesystem */
        struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
@@ -438,7 +467,7 @@ extern const struct file_operations fuse_dev_operations;
 /**
 * Get a filled in inode
 */
-struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
                        int generation, struct fuse_attr *attr,
                        u64 attr_valid, u64 attr_version);
@@ -446,7 +475,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
 * Send FORGET command
 */
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
-                      unsigned long nodeid, u64 nlookup);
+                      u64 nodeid, u64 nlookup);
 /**
 * Initialize READ or READDIR request
@@ -504,6 +533,11 @@ void fuse_init_symlink(struct inode *inode);
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
                            u64 attr_valid, u64 attr_version);
+void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+                                   u64 attr_valid);
+void fuse_truncate(struct address_space *mapping, loff_t offset);
 /**
 * Initialize the client device
 */
@@ -522,6 +556,8 @@ void fuse_ctl_cleanup(void);
 */
 struct fuse_req *fuse_request_alloc(void);
+struct fuse_req *fuse_request_alloc_nofs(void);
 /**
 * Free a request
 */
@@ -558,6 +594,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
 */
 void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
 /* Abort all requests */
 void fuse_abort_conn(struct fuse_conn *fc);
@@ -600,3 +638,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
 int fuse_update_attributes(struct inode *inode, struct kstat *stat,
                           struct file *file, bool *refreshed);
+void fuse_flush_writepages(struct inode *inode);
+void fuse_set_nowrite(struct inode *inode);
+void fuse_release_nowrite(struct inode *inode);
+u64 fuse_get_attr_version(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4df34da2284a..fb77e0962132 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
        fi->nodeid = 0;
        fi->nlookup = 0;
        fi->attr_version = 0;
+        fi->writectr = 0;
        INIT_LIST_HEAD(&fi->write_files);
+        INIT_LIST_HEAD(&fi->queued_writes);
+        INIT_LIST_HEAD(&fi->writepages);
+        init_waitqueue_head(&fi->page_waitq);
        fi->forget_req = fuse_request_alloc();
        if (!fi->forget_req) {
                kmem_cache_free(fuse_inode_cachep, inode);
@@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
        BUG_ON(!list_empty(&fi->write_files));
+        BUG_ON(!list_empty(&fi->queued_writes));
        if (fi->forget_req)
                fuse_request_free(fi->forget_req);
        kmem_cache_free(fuse_inode_cachep, inode);
 }
 void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
-                      unsigned long nodeid, u64 nlookup)
+                      u64 nodeid, u64 nlookup)
 {
        struct fuse_forget_in *inarg = &req->misc.forget_in;
        inarg->nlookup = nlookup;
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
        return 0;
 }
-static void fuse_truncate(struct address_space *mapping, loff_t offset)
+void fuse_truncate(struct address_space *mapping, loff_t offset)
 {
        /* See vmtruncate() */
        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 }
+void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                                   u64 attr_valid)
-                            u64 attr_valid, u64 attr_version)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
-        loff_t oldsize;
-        spin_lock(&fc->lock);
-        if (attr_version != 0 && fi->attr_version > attr_version) {
-                spin_unlock(&fc->lock);
-                return;
-        }
        fi->attr_version = ++fc->attr_version;
        fi->i_time = attr_valid;
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
        fi->orig_i_mode = inode->i_mode;
        if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
                inode->i_mode &= ~S_ISVTX;
+}
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                            u64 attr_valid, u64 attr_version)
+{
+        struct fuse_conn *fc = get_fuse_conn(inode);
+        struct fuse_inode *fi = get_fuse_inode(inode);
+        loff_t oldsize;
+        spin_lock(&fc->lock);
+        if (attr_version != 0 && fi->attr_version > attr_version) {
+                spin_unlock(&fc->lock);
+                return;
+        }
+        fuse_change_attributes_common(inode, attr, attr_valid);
        oldsize = inode->i_size;
        i_size_write(inode, attr->size);
@@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 {
-        unsigned long nodeid = *(unsigned long *) _nodeidp;
+        u64 nodeid = *(u64 *) _nodeidp;
        if (get_node_id(inode) == nodeid)
                return 1;
        else
@@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 static int fuse_inode_set(struct inode *inode, void *_nodeidp)
 {
-        unsigned long nodeid = *(unsigned long *) _nodeidp;
+        u64 nodeid = *(u64 *) _nodeidp;
        get_fuse_inode(inode)->nodeid = nodeid;
        return 0;
 }
-struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
                        int generation, struct fuse_attr *attr,
                        u64 attr_valid, u64 attr_version)
 {
@@ -447,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
        return 0;
 }
-static struct fuse_conn *new_conn(void)
+static struct fuse_conn *new_conn(struct super_block *sb)
 {
        struct fuse_conn *fc;
        int err;
@@ -468,19 +482,41 @@ static struct fuse_conn *new_conn(void)
                atomic_set(&fc->num_waiting, 0);
                fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
                fc->bdi.unplug_io_fn = default_unplug_io_fn;
+                /* fuse does it's own writeback accounting */
+                fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+                fc->dev = sb->s_dev;
                err = bdi_init(&fc->bdi);
-                if (err) {
+                if (err)
-                        kfree(fc);
+                        goto error_kfree;
-                        fc = NULL;
+                err = bdi_register_dev(&fc->bdi, fc->dev);
-                        goto out;
+                if (err)
-                }
+                        goto error_bdi_destroy;
+                /*
+                 * For a single fuse filesystem use max 1% of dirty +
+                 * writeback threshold.
+                 *
+                 * This gives about 1M of write buffer for memory maps on a
+                 * machine with 1G and 10% dirty_ratio, which should be more
+                 * than enough.
+                 *
+                 * Privileged users can raise it by writing to
+                 *
+                 *    /sys/class/bdi/<bdi>/max_ratio
+                 */
+                bdi_set_max_ratio(&fc->bdi, 1);
                fc->reqctr = 0;
                fc->blocked = 1;
                fc->attr_version = 1;
                get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
        }
-out:
        return fc;
+error_bdi_destroy:
+        bdi_destroy(&fc->bdi);
+error_kfree:
+        mutex_destroy(&fc->inst_mutex);
+        kfree(fc);
+        return NULL;
 }
 void fuse_conn_put(struct fuse_conn *fc)
@@ -540,6 +576,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                fc->no_lock = 1;
                        if (arg->flags & FUSE_ATOMIC_O_TRUNC)
                                fc->atomic_o_trunc = 1;
+                        if (arg->flags & FUSE_BIG_WRITES)
+                                fc->big_writes = 1;
                } else {
                        ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                        fc->no_lock = 1;
@@ -548,6 +586,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
                fc->minor = arg->minor;
                fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+                fc->max_write = min_t(unsigned, 4096, fc->max_write);
                fc->conn_init = 1;
        }
        fuse_put_request(fc, req);
@@ -562,7 +601,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        arg->major = FUSE_KERNEL_VERSION;
        arg->minor = FUSE_KERNEL_MINOR_VERSION;
        arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
-        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC;
+        arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+                FUSE_BIG_WRITES;
        req->in.h.opcode = FUSE_INIT;
        req->in.numargs = 1;
        req->in.args[0].size = sizeof(*arg);
@@ -578,12 +618,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
        request_send_background(fc, req);
 }
-static u64 conn_id(void)
-{
-        static u64 ctr = 1;
-        return ctr++;
-}
 static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct fuse_conn *fc;
@@ -621,14 +655,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (file->f_op != &fuse_dev_operations)
                return -EINVAL;
-        fc = new_conn();
+        fc = new_conn(sb);
        if (!fc)
                return -ENOMEM;
        fc->flags = d.flags;
        fc->user_id = d.user_id;
        fc->group_id = d.group_id;
-        fc->max_read = d.max_read;
+        fc->max_read = min_t(unsigned, 4096, d.max_read);
        /* Used by get_root_inode() */
        sb->s_fs_info = fc;
@@ -659,7 +693,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
        if (file->private_data)
                goto err_unlock;
-        fc->id = conn_id();
        err = fuse_ctl_add_conn(fc);
        if (err)
                goto err_unlock;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 8479da47049c..a4ff271df9ee 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -212,7 +212,7 @@ int gdlm_sysfs_init(void)
 {
        gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
        if (!gdlm_kset) {
-                printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__);
+                printk(KERN_WARNING "%s: can not create kset\n", __func__);
                return -ENOMEM;
        }
        return 0;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 509c5d60bd80..7f48576289c9 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
 #define gfs2_assert_withdraw(sdp, assertion) \
 ((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
-                                        __FUNCTION__, __FILE__, __LINE__))
+                                        __func__, __FILE__, __LINE__))
 int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
@@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
 #define gfs2_assert_warn(sdp, assertion) \
 ((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
-                                        __FUNCTION__, __FILE__, __LINE__))
+                                        __func__, __FILE__, __LINE__))
 int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
                   const char *function, char *file, unsigned int line);
 #define gfs2_consist(sdp) \
-gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
+gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
 int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
                         const char *function, char *file, unsigned int line);
 #define gfs2_consist_inode(ip) \
-gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
+gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
 int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
                         const char *function, char *file, unsigned int line);
 #define gfs2_consist_rgrpd(rgd) \
-gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
+gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
 }
 #define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
+gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
 }
 #define gfs2_metatype_check(sdp, bh, type) \
-gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
+gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
 static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
                                     u16 format)
@@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
                    char *file, unsigned int line);
 #define gfs2_io_error(sdp) \
-gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
+gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
 int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
                       const char *function, char *file, unsigned int line);
 #define gfs2_io_error_bh(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
 extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 24cf6fc43021..f6621a785202 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
        struct hfs_bnode *node, *next_node;
        struct page **pagep;
        u32 nidx, idx;
-        u16 off, len;
+        unsigned off;
+        u16 off16;
+        u16 len;
        u8 *data, byte, m;
        int i;
@@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
        node = hfs_bnode_find(tree, nidx);
        if (IS_ERR(node))
                return node;
-        len = hfs_brec_lenoff(node, 2, &off);
+        len = hfs_brec_lenoff(node, 2, &off16);
+        off = off16;
        off += node->page_offset;
        pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
                        return next_node;
                node = next_node;
-                len = hfs_brec_lenoff(node, 0, &off);
+                len = hfs_brec_lenoff(node, 0, &off16);
+                off = off16;
                off += node->page_offset;
                pagep = node->page + (off >> PAGE_CACHE_SHIFT);
                data = kmap(*pagep);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index b4651e128d7f..36ca2e1a4fa3 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb)
                attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT);
                attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT);
                mdb->drAtrb = attrib;
-                mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1);
+                be32_add_cpu(&mdb->drWrCnt, 1);
                mdb->drLsMod = hfs_mtime();
                mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 32de44ed0021..8cf67974adf6 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
                                return 0;
                        }
                        p = match_strdup(&args[0]);
-                        hsb->nls_disk = load_nls(p);
+                        if (p)
+                                hsb->nls_disk = load_nls(p);
                        if (!hsb->nls_disk) {
                                printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
                                kfree(p);
@@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
                                return 0;
                        }
                        p = match_strdup(&args[0]);
-                        hsb->nls_io = load_nls(p);
+                        if (p)
+                                hsb->nls_io = load_nls(p);
                        if (!hsb->nls_io) {
                                printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
                                kfree(p);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index bb5433608a42..e49fcee1e293 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
        struct hfs_bnode *node, *next_node;
        struct page **pagep;
        u32 nidx, idx;
-        u16 off, len;
+        unsigned off;
+        u16 off16;
+        u16 len;
        u8 *data, byte, m;
        int i;
@@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
        node = hfs_bnode_find(tree, nidx);
        if (IS_ERR(node))
                return node;
-        len = hfs_brec_lenoff(node, 2, &off);
+        len = hfs_brec_lenoff(node, 2, &off16);
+        off = off16;
        off += node->page_offset;
        pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
                        return next_node;
                node = next_node;
-                len = hfs_brec_lenoff(node, 0, &off);
+                len = hfs_brec_lenoff(node, 0, &off16);
+                off = off16;
                off += node->page_offset;
                pagep = node->page + (off >> PAGE_CACHE_SHIFT);
                data = kmap(*pagep);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d72d0a8b25aa..9e59537b43d5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
 int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
                       struct inode *, struct qstr *);
+/* dir.c */
+extern const struct inode_operations hfsplus_dir_inode_operations;
+extern const struct file_operations hfsplus_dir_operations;
 /* extents.c */
 int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
 void hfsplus_ext_write_extent(struct inode *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 37744cf3706a..67e1c8b467c4 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -65,6 +65,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
                BUG();
                return 0;
        }
+        if (!tree)
+                return 0;
        if (tree->node_size >= PAGE_CACHE_SIZE) {
                nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
                spin_lock(&tree->hash_lock);
@@ -278,9 +280,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        return 0;
 }
-extern const struct inode_operations hfsplus_dir_inode_operations;
-extern struct file_operations hfsplus_dir_operations;
 static const struct inode_operations hfsplus_file_inode_operations = {
        .lookup         = hfsplus_file_lookup,
        .truncate       = hfsplus_file_truncate,
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index dc64fac00831..9997cbf8beb5 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
                                return 0;
                        }
                        p = match_strdup(&args[0]);
-                        sbi->nls = load_nls(p);
+                        if (p)
+                                sbi->nls = load_nls(p);
                        if (!sbi->nls) {
                                printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
                                kfree(p);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b0f9ad362d1d..ce97a54518d8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
                printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
-                printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, "
+                printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
                       "use the force option at your own risk, mounting read-only.\n");
                sb->s_flags |= MS_RDONLY;
        }
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
         */
        vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
        vhdr->modify_date = hfsp_now2mt();
-        vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1);
+        be32_add_cpu(&vhdr->write_count, 1);
        vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
        vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
        mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 72cab78f0509..175d08eacc86 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
                return 0;
        wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
-        extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT)));
+        extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
        wd->embed_start = (extent >> 16) & 0xFFFF;
        wd->embed_count = extent & 0xFFFF;
diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile
index 6890433f7595..8a1f50344368 100644
--- a/fs/hppfs/Makefile
+++ b/fs/hppfs/Makefile
@@ -1,9 +1,9 @@
 #
-# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com)
+# Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 # Licensed under the GPL
 #
-hppfs-objs := hppfs_kern.o
+hppfs-objs := hppfs.o
 obj-y =
-obj-$(CONFIG_HPPFS) += hppfs.o
+obj-$(CONFIG_HPPFS) += $(hppfs-objs)
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs.c
index 8601d8ef3b55..65077aa90f0a 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs.c
@@ -33,7 +33,7 @@ struct hppfs_private {
 };
 struct hppfs_inode_info {
-        struct dentry *proc_dentry;
+        struct dentry *proc_dentry;
        struct inode vfs_inode;
 };
@@ -52,7 +52,7 @@ static int is_pid(struct dentry *dentry)
        int i;
        sb = dentry->d_sb;
-        if ((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
+        if (dentry->d_parent != sb->s_root)
                return 0;
        for (i = 0; i < dentry->d_name.len; i++) {
@@ -136,7 +136,7 @@ static int file_removed(struct dentry *dentry, const char *file)
 }
 static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
-                                  struct nameidata *nd)
+                                   struct nameidata *nd)
 {
        struct dentry *proc_dentry, *new, *parent;
        struct inode *inode;
@@ -254,6 +254,8 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
        int err;
        if (hppfs->contents != NULL) {
+                int rem;
                if (*ppos >= hppfs->len)
                        return 0;
@@ -267,8 +269,10 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
                if (off + count > hppfs->len)
                        count = hppfs->len - off;
-                copy_to_user(buf, &data->contents[off], count);
+                rem = copy_to_user(buf, &data->contents[off], count);
-                *ppos += count;
+                *ppos += count - rem;
+                if (rem > 0)
+                        return -EFAULT;
        } else if (hppfs->host_fd != -1) {
                err = os_seek_file(hppfs->host_fd, *ppos);
                if (err) {
@@ -285,21 +289,15 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count,
        return count;
 }
-static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len,
+static ssize_t hppfs_write(struct file *file, const char __user *buf,
-                           loff_t *ppos)
+                           size_t len, loff_t *ppos)
 {
        struct hppfs_private *data = file->private_data;
        struct file *proc_file = data->proc_file;
        ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
-        int err;
        write = proc_file->f_path.dentry->d_inode->i_fop->write;
+        return (*write)(proc_file, buf, len, ppos);
-        proc_file->f_pos = file->f_pos;
-        err = (*write)(proc_file, buf, len, &proc_file->f_pos);
-        file->f_pos = proc_file->f_pos;
-        return err;
 }
 static int open_host_sock(char *host_file, int *filter_out)
@@ -357,7 +355,7 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter,
        if (filter) {
                while ((n = read_proc(proc_file, data->contents,
-                                     sizeof(data->contents), NULL, 0)) > 0)
+                                      sizeof(data->contents), NULL, 0)) > 0)
                        os_write_file(fd, data->contents, n);
                err = os_shutdown_socket(fd, 0, 1);
                if (err) {
@@ -429,8 +427,8 @@ static int file_mode(int fmode)
 static int hppfs_open(struct inode *inode, struct file *file)
 {
        struct hppfs_private *data;
-        struct dentry *proc_dentry;
        struct vfsmount *proc_mnt;
+        struct dentry *proc_dentry;
        char *host_file;
        int err, fd, type, filter;
@@ -492,8 +490,8 @@ static int hppfs_open(struct inode *inode, struct file *file)
 static int hppfs_dir_open(struct inode *inode, struct file *file)
 {
        struct hppfs_private *data;
-        struct dentry *proc_dentry;
        struct vfsmount *proc_mnt;
+        struct dentry *proc_dentry;
        int err;
        err = -ENOMEM;
@@ -620,6 +618,9 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
 void hppfs_delete_inode(struct inode *ino)
 {
+        dput(HPPFS_I(ino)->proc_dentry);
+        mntput(ino->i_sb->s_fs_info);
        clear_inode(ino);
 }
@@ -628,69 +629,46 @@ static void hppfs_destroy_inode(struct inode *inode)
        kfree(HPPFS_I(inode));
 }
-static void hppfs_put_super(struct super_block *sb)
-{
-        mntput(sb->s_fs_info);
-}
 static const struct super_operations hppfs_sbops = {
        .alloc_inode    = hppfs_alloc_inode,
        .destroy_inode  = hppfs_destroy_inode,
        .delete_inode   = hppfs_delete_inode,
        .statfs         = hppfs_statfs,
-        .put_super      = hppfs_put_super,
 };
 static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
                          int buflen)
 {
-        struct file *proc_file;
        struct dentry *proc_dentry;
-        struct vfsmount *proc_mnt;
-        int ret;
        proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
-        proc_mnt = dentry->d_sb->s_fs_info;
+        return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
+                                                    buflen);
-        proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY);
-        if (IS_ERR(proc_file))
-                return PTR_ERR(proc_file);
-        ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen);
-        fput(proc_file);
-        return ret;
 }
-static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-        struct file *proc_file;
        struct dentry *proc_dentry;
-        struct vfsmount *proc_mnt;
-        void *ret;
        proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
-        proc_mnt = dentry->d_sb->s_fs_info;
-        proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY);
-        if (IS_ERR(proc_file))
-                return proc_file;
-        ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
-        fput(proc_file);
+        return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
+}
-        return ret;
+int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+        return generic_permission(inode, mask, NULL);
 }
 static const struct inode_operations hppfs_dir_iops = {
        .lookup         = hppfs_lookup,
+        .permission     = hppfs_permission,
 };
 static const struct inode_operations hppfs_link_iops = {
        .readlink       = hppfs_readlink,
        .follow_link    = hppfs_follow_link,
+        .permission     = hppfs_permission,
 };
 static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
@@ -712,7 +690,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
                inode->i_fop = &hppfs_file_fops;
        }
-        HPPFS_I(inode)->proc_dentry = dentry;
+        HPPFS_I(inode)->proc_dentry = dget(dentry);
        inode->i_uid = proc_ino->i_uid;
        inode->i_gid = proc_ino->i_gid;
@@ -725,7 +703,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
        inode->i_size = proc_ino->i_size;
        inode->i_blocks = proc_ino->i_blocks;
-        return 0;
+        return inode;
 }
 static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9783723e8ffe..aeabf80f81a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations;
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
-        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 int sysctl_hugetlb_shm_group;
diff --git a/fs/inode.c b/fs/inode.c
index 27ee1af50d02..c36d9480335c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea
        struct inode * inode = NULL;
 repeat:
-        hlist_for_each (node, head) { 
+        hlist_for_each_entry(inode, node, head, i_hash) {
-                inode = hlist_entry(node, struct inode, i_hash);
                if (inode->i_sb != sb)
                        continue;
                if (!test(inode, data))
@@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head
        struct inode * inode = NULL;
 repeat:
-        hlist_for_each (node, head) {
+        hlist_for_each_entry(inode, node, head, i_hash) {
-                inode = hlist_entry(node, struct inode, i_hash);
                if (inode->i_ino != ino)
                        continue;
                if (inode->i_sb != sb)
@@ -1151,13 +1149,8 @@ static inline void iput_final(struct inode *inode)
 void iput(struct inode *inode)
 {
        if (inode) {
-                const struct super_operations *op = inode->i_sb->s_op;
                BUG_ON(inode->i_state == I_CLEAR);
-                if (op && op->put_inode)
-                        op->put_inode(inode);
                if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
                        iput_final(inode);
        }
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 7b94a1e3c015..6676c06bb7c1 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void)
        }
        ih = inotify_init(&inotify_user_ops);
-        if (unlikely(IS_ERR(ih))) {
+        if (IS_ERR(ih)) {
                ret = PTR_ERR(ih);
                goto out_free_dev;
        }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f32fbde2175e..7db32b3382d3 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -28,8 +28,8 @@
 *
 * Returns 0 on success, -errno on error.
 */
-long vfs_ioctl(struct file *filp, unsigned int cmd,
+static long vfs_ioctl(struct file *filp, unsigned int cmd,
-               unsigned long arg)
+                      unsigned long arg)
 {
        int error = -ENOTTY;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 1ba407c64df1..2f0dc5a14633 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
                        }
                        de = tmpde;
                }
+                /* Basic sanity check, whether name doesn't exceed dir entry */
+                if (de_len < de->name_len[0] +
+                                        sizeof(struct iso_directory_record)) {
+                        printk(KERN_NOTICE "iso9660: Corrupted directory entry"
+                               " in block %lu of inode %lu\n", block,
+                               inode->i_ino);
+                        return -EIO;
+                }
                if (first_de) {
                        isofs_normalize_block_and_offset(de,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index d1bdf8adb351..ccbf72faf27a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -78,29 +78,29 @@ static inline int isonum_712(char *p)
 }
 static inline unsigned int isonum_721(char *p)
 {
-        return le16_to_cpu(get_unaligned((__le16 *)p));
+        return get_unaligned_le16(p);
 }
 static inline unsigned int isonum_722(char *p)
 {
-        return be16_to_cpu(get_unaligned((__le16 *)p));
+        return get_unaligned_be16(p);
 }
 static inline unsigned int isonum_723(char *p)
 {
        /* Ignore bigendian datum due to broken mastering programs */
-        return le16_to_cpu(get_unaligned((__le16 *)p));
+        return get_unaligned_le16(p);
 }
 static inline unsigned int isonum_731(char *p)
 {
-        return le32_to_cpu(get_unaligned((__le32 *)p));
+        return get_unaligned_le32(p);
 }
 static inline unsigned int isonum_732(char *p)
 {
-        return be32_to_cpu(get_unaligned((__le32 *)p));
+        return get_unaligned_be32(p);
 }
 static inline unsigned int isonum_733(char *p)
 {
        /* Ignore bigendian datum due to broken mastering programs */
-        return le32_to_cpu(get_unaligned((__le32 *)p));
+        return get_unaligned_le32(p);
 }
 extern int iso_date(char *, int);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 344b247bc29a..8299889a835e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
                dlen = de->name_len[0];
                dpnt = de->name;
+                /* Basic sanity check, whether name doesn't exceed dir entry */
+                if (de_len < dlen + sizeof(struct iso_directory_record)) {
+                        printk(KERN_NOTICE "iso9660: Corrupted directory entry"
+                               " in block %lu of inode %lu\n", block,
+                               dir->i_ino);
+                        return 0;
+                }
                if (sbi->s_rock &&
                    ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index cd931ef1f000..5a8ca61498ca 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -470,7 +470,9 @@ void journal_commit_transaction(journal_t *journal)
         * transaction!  Now comes the tricky part: we need to write out
         * metadata.  Loop over the transaction's entire buffer list:
         */
+        spin_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_COMMIT;
+        spin_unlock(&journal->j_state_lock);
        J_ASSERT(commit_transaction->t_nr_buffers <=
                 commit_transaction->t_outstanding_credits);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..4d99685fdce4 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        jbd_debug (3, "JBD: commit phase 2\n");
        /*
-         * First, drop modified flag: all accesses to the buffers
-         * will be tracked for a new trasaction only -bzzz
-         */
-        spin_lock(&journal->j_list_lock);
-        if (commit_transaction->t_buffers) {
-                new_jh = jh = commit_transaction->t_buffers->b_tnext;
-                do {
-                        J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
-                                        new_jh->b_modified == 0);
-                        new_jh->b_modified = 0;
-                        new_jh = new_jh->b_tnext;
-                } while (new_jh != jh);
-        }
-        spin_unlock(&journal->j_list_lock);
-        /*
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
@@ -576,7 +560,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * transaction!  Now comes the tricky part: we need to write out
         * metadata.  Loop over the transaction's entire buffer list:
         */
+        spin_lock(&journal->j_state_lock);
        commit_transaction->t_state = T_COMMIT;
+        spin_unlock(&journal->j_state_lock);
        stats.u.run.rs_logging = jiffies;
        stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
@@ -584,6 +570,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
        stats.u.run.rs_blocks_logged = 0;
+        J_ASSERT(commit_transaction->t_nr_buffers <=
+                 commit_transaction->t_outstanding_credits);
        descriptor = NULL;
        bufs = 0;
        while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 954cff001df6..2e24567c4a79 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
        if (!tid_geq(journal->j_commit_request, tid)) {
                printk(KERN_EMERG
                       "%s: error: j_commit_request=%d, tid=%d\n",
-                       __FUNCTION__, journal->j_commit_request, tid);
+                       __func__, journal->j_commit_request, tid);
        }
        spin_unlock(&journal->j_state_lock);
 #endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
                        printk(KERN_ALERT "%s: journal block not found "
                                        "at offset %lu on %s\n",
-                                __FUNCTION__,
+                                __func__,
                                blocknr,
                                bdevname(journal->j_dev, b));
                        err = -EIO;
@@ -901,22 +901,13 @@ static void jbd2_stats_proc_init(journal_t *journal)
 {
        char name[BDEVNAME_SIZE];
-        snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
+        bdevname(journal->j_dev, name);
        journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
        if (journal->j_proc_entry) {
-                struct proc_dir_entry *p;
+                proc_create_data("history", S_IRUGO, journal->j_proc_entry,
-                p = create_proc_entry("history", S_IRUGO,
+                                 &jbd2_seq_history_fops, journal);
-                                journal->j_proc_entry);
+                proc_create_data("info", S_IRUGO, journal->j_proc_entry,
-                if (p) {
+                                 &jbd2_seq_info_fops, journal);
-                        p->proc_fops = &jbd2_seq_history_fops;
-                        p->data = journal;
-                        p = create_proc_entry("info", S_IRUGO,
-                                                journal->j_proc_entry);
-                        if (p) {
-                                p->proc_fops = &jbd2_seq_info_fops;
-                                p->data = journal;
-                        }
-                }
        }
 }
@@ -924,7 +915,7 @@ static void jbd2_stats_proc_exit(journal_t *journal)
 {
        char name[BDEVNAME_SIZE];
-        snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
+        bdevname(journal->j_dev, name);
        remove_proc_entry("info", journal->j_proc_entry);
        remove_proc_entry("history", journal->j_proc_entry);
        remove_proc_entry(name, proc_jbd2_stats);
@@ -1006,13 +997,14 @@ fail:
 */
 /**
- *  journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure
+ *  journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
 *  @bdev: Block device on which to create the journal
 *  @fs_dev: Device which hold journalled filesystem for this journal.
 *  @start: Block nr Start of journal.
 *  @len:  Length of the journal in blocks.
 *  @blocksize: blocksize of journalling device
- *  @returns: a newly created journal_t *
+ *
+ *  Returns: a newly created journal_t *
 *
 *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
 *  range of blocks on an arbitrary block device.
@@ -1036,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
        journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
        if (!journal->j_wbuf) {
                printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
-                        __FUNCTION__);
+                        __func__);
                kfree(journal);
                journal = NULL;
                goto out;
@@ -1092,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
        journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
        if (!journal->j_wbuf) {
                printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
-                        __FUNCTION__);
+                        __func__);
                kfree(journal);
                return NULL;
        }
@@ -1101,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
        /* If that failed, give up */
        if (err) {
                printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
-                       __FUNCTION__);
+                       __func__);
                kfree(journal);
                return NULL;
        }
@@ -1187,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
                 */
                printk(KERN_EMERG
                       "%s: creation of journal on external device!\n",
-                       __FUNCTION__);
+                       __func__);
                BUG();
        }
@@ -1985,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
 static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
 {
-        J_ASSERT(jbd2_journal_head_cache != NULL);
+        if (jbd2_journal_head_cache) {
-        kmem_cache_destroy(jbd2_journal_head_cache);
+                kmem_cache_destroy(jbd2_journal_head_cache);
-        jbd2_journal_head_cache = NULL;
+                jbd2_journal_head_cache = NULL;
+        }
 }
 /*
@@ -2006,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
                jbd_debug(1, "out of memory for journal_head\n");
                if (time_after(jiffies, last_warning + 5*HZ)) {
                        printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
-                               __FUNCTION__);
+                               __func__);
                        last_warning = jiffies;
                }
                while (!ret) {
@@ -2143,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
                        if (jh->b_frozen_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_frozen_data\n",
-                                                __FUNCTION__);
+                                                __func__);
                                jbd2_free(jh->b_frozen_data, bh->b_size);
                        }
                        if (jh->b_committed_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_committed_data\n",
-                                                __FUNCTION__);
+                                                __func__);
                                jbd2_free(jh->b_committed_data, bh->b_size);
                        }
                        bh->b_private = NULL;
@@ -2314,10 +2307,12 @@ static int __init journal_init(void)
        BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
        ret = journal_init_caches();
-        if (ret != 0)
+        if (ret == 0) {
+                jbd2_create_debugfs_entry();
+                jbd2_create_jbd_stats_proc_entry();
+        } else {
                jbd2_journal_destroy_caches();
-        jbd2_create_debugfs_entry();
+        }
-        jbd2_create_jbd_stats_proc_entry();
        return ret;
 }
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
 oom:
        if (!journal_oom_retry)
                return -ENOMEM;
-        jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__);
+        jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
        yield();
        goto repeat;
 }
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
        return NULL;
 }
+void jbd2_journal_destroy_revoke_caches(void)
+{
+        if (jbd2_revoke_record_cache) {
+                kmem_cache_destroy(jbd2_revoke_record_cache);
+                jbd2_revoke_record_cache = NULL;
+        }
+        if (jbd2_revoke_table_cache) {
+                kmem_cache_destroy(jbd2_revoke_table_cache);
+                jbd2_revoke_table_cache = NULL;
+        }
+}
 int __init jbd2_journal_init_revoke_caches(void)
 {
+        J_ASSERT(!jbd2_revoke_record_cache);
+        J_ASSERT(!jbd2_revoke_table_cache);
        jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
                                           sizeof(struct jbd2_revoke_record_s),
                                           0,
                                           SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
                                           NULL);
        if (!jbd2_revoke_record_cache)
-                return -ENOMEM;
+                goto record_cache_failure;
        jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
                                           sizeof(struct jbd2_revoke_table_s),
                                           0, SLAB_TEMPORARY, NULL);
-        if (!jbd2_revoke_table_cache) {
+        if (!jbd2_revoke_table_cache)
-                kmem_cache_destroy(jbd2_revoke_record_cache);
+                goto table_cache_failure;
-                jbd2_revoke_record_cache = NULL;
-                return -ENOMEM;
-        }
        return 0;
+table_cache_failure:
+        jbd2_journal_destroy_revoke_caches();
+record_cache_failure:
+                return -ENOMEM;
 }
-void jbd2_journal_destroy_revoke_caches(void)
+static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
 {
-        kmem_cache_destroy(jbd2_revoke_record_cache);
+        int shift = 0;
-        jbd2_revoke_record_cache = NULL;
+        int tmp = hash_size;
-        kmem_cache_destroy(jbd2_revoke_table_cache);
+        struct jbd2_revoke_table_s *table;
-        jbd2_revoke_table_cache = NULL;
-}
-/* Initialise the revoke table for a given journal to a given size. */
-int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
-{
-        int shift, tmp;
-        J_ASSERT (journal->j_revoke_table[0] == NULL);
+        table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+        if (!table)
+                goto out;
-        shift = 0;
-        tmp = hash_size;
        while((tmp >>= 1UL) != 0UL)
                shift++;
-        journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+        table->hash_size = hash_size;
-        if (!journal->j_revoke_table[0])
+        table->hash_shift = shift;
-                return -ENOMEM;
+        table->hash_table =
-        journal->j_revoke = journal->j_revoke_table[0];
-        /* Check that the hash_size is a power of two */
-        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
-        journal->j_revoke->hash_shift = shift;
-        journal->j_revoke->hash_table =
                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-        if (!journal->j_revoke->hash_table) {
+        if (!table->hash_table) {
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+                kmem_cache_free(jbd2_revoke_table_cache, table);
-                journal->j_revoke = NULL;
+                table = NULL;
-                return -ENOMEM;
+                goto out;
        }
        for (tmp = 0; tmp < hash_size; tmp++)
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+                INIT_LIST_HEAD(&table->hash_table[tmp]);
-        journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+out:
-        if (!journal->j_revoke_table[1]) {
+        return table;
-                kfree(journal->j_revoke_table[0]->hash_table);
+}
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
-                return -ENOMEM;
+static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
+{
+        int i;
+        struct list_head *hash_list;
+        for (i = 0; i < table->hash_size; i++) {
+                hash_list = &table->hash_table[i];
+                J_ASSERT(list_empty(hash_list));
        }
-        journal->j_revoke = journal->j_revoke_table[1];
+        kfree(table->hash_table);
+        kmem_cache_free(jbd2_revoke_table_cache, table);
+}
-        /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
+{
+        J_ASSERT(journal->j_revoke_table[0] == NULL);
        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
+        journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
+        if (!journal->j_revoke_table[0])
-        journal->j_revoke->hash_shift = shift;
+                goto fail0;
-        journal->j_revoke->hash_table =
+        journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
-                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+        if (!journal->j_revoke_table[1])
-        if (!journal->j_revoke->hash_table) {
+                goto fail1;
-                kfree(journal->j_revoke_table[0]->hash_table);
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
-                kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
-                journal->j_revoke = NULL;
-                return -ENOMEM;
-        }
-        for (tmp = 0; tmp < hash_size; tmp++)
+        journal->j_revoke = journal->j_revoke_table[1];
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
        spin_lock_init(&journal->j_revoke_lock);
        return 0;
-}
-/* Destoy a journal's revoke table.  The table must already be empty! */
+fail1:
+        jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+        return -ENOMEM;
+}
+/* Destroy a journal's revoke table.  The table must already be empty! */
 void jbd2_journal_destroy_revoke(journal_t *journal)
 {
-        struct jbd2_revoke_table_s *table;
-        struct list_head *hash_list;
-        int i;
-        table = journal->j_revoke_table[0];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(jbd2_revoke_table_cache, table);
-        journal->j_revoke = NULL;
-        table = journal->j_revoke_table[1];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(jbd2_revoke_table_cache, table);
        journal->j_revoke = NULL;
+        if (journal->j_revoke_table[0])
+                jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+        if (journal->j_revoke_table[1])
+                jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
                goto done;
        /*
+         * this is the first time this transaction is touching this buffer,
+         * reset the modified flag
+         */
+       jh->b_modified = 0;
+        /*
         * If there is already a copy-out version of this buffer, then we don't
         * need to make another one
         */
@@ -690,7 +696,7 @@ repeat:
                                if (!frozen_buffer) {
                                        printk(KERN_EMERG
                                               "%s: OOM for frozen_buffer\n",
-                                               __FUNCTION__);
+                                               __func__);
                                        JBUFFER_TRACE(jh, "oom!");
                                        error = -ENOMEM;
                                        jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        if (jh->b_transaction == NULL) {
                jh->b_transaction = transaction;
+                /* first access by this transaction */
+                jh->b_modified = 0;
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
                __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
        } else if (jh->b_transaction == journal->j_committing_transaction) {
+                /* first access by this transaction */
+                jh->b_modified = 0;
                JBUFFER_TRACE(jh, "set next transaction");
                jh->b_next_transaction = transaction;
        }
@@ -901,7 +914,7 @@ repeat:
                committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                if (!committed_data) {
                        printk(KERN_EMERG "%s: No memory for committed data\n",
-                                __FUNCTION__);
+                                __func__);
                        err = -ENOMEM;
                        goto out;
                }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
        struct journal_head *jh;
        int drop_reserve = 0;
        int err = 0;
+        int was_modified = 0;
        BUFFER_TRACE(bh, "entry");
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
                goto not_jbd;
        }
+        /* keep track of wether or not this transaction modified us */
+        was_modified = jh->b_modified;
        /*
         * The buffer's going from the transaction, we must drop
         * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
                JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
-                drop_reserve = 1;
+                /*
+                 * we only want to drop a reference if this transaction
+                 * modified the buffer
+                 */
+                if (was_modified)
+                        drop_reserve = 1;
                /*
                 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
                if (jh->b_next_transaction) {
                        J_ASSERT(jh->b_next_transaction == transaction);
                        jh->b_next_transaction = NULL;
-                        drop_reserve = 1;
+                        /*
+                         * only drop a reference if this transaction modified
+                         * the buffer
+                         */
+                        if (was_modified)
+                                drop_reserve = 1;
                }
        }
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
        return err;
 }
-/**int jbd2_journal_force_commit() - force any uncommitted transactions
+/**
+ * int jbd2_journal_force_commit() - force any uncommitted transactions
 * @journal: journal to force
 *
 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
        jh->b_transaction = jh->b_next_transaction;
        jh->b_next_transaction = NULL;
        __jbd2_journal_file_buffer(jh, jh->b_transaction,
-                                was_dirty ? BJ_Metadata : BJ_Reserved);
+                                jh->b_modified ? BJ_Metadata : BJ_Reserved);
        J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
        if (was_dirty)
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index d58f845ccb85..c5e1450d79f9 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
 static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
-                                        struct jffs2_inode_cache *ic)
+                                    struct jffs2_inode_cache *ic)
 {
        struct jffs2_full_dirent *fd;
@@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
                        continue;
                }
-                if (child_ic->nlink++ && fd->type == DT_DIR) {
+                if (fd->type == DT_DIR) {
-                        JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
+                        if (child_ic->pino_nlink) {
-                                fd->name, fd->ino, ic->ino);
+                                JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
-                        /* TODO: What do we do about it? */
+                                            fd->name, fd->ino, ic->ino);
-                }
+                                /* TODO: What do we do about it? */
+                        } else {
+                                child_ic->pino_nlink = ic->ino;
+                        }
+                } else
+                        child_ic->pino_nlink++;
                dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
                /* Can't free scan_dents so far. We might need them in pass 2 */
        }
@@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
        dbg_fsbuild("pass 2 starting\n");
        for_each_inode(i, c, ic) {
-                if (ic->nlink)
+                if (ic->pino_nlink)
                        continue;
                jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
@@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
                        /* Reduce nlink of the child. If it's now zero, stick it on the
                           dead_fds list to be cleaned up later. Else just free the fd */
-                        child_ic->nlink--;
+                        if (fd->type == DT_DIR)
+                                child_ic->pino_nlink = 0;
+                        else
+                                child_ic->pino_nlink--;
-                        if (!child_ic->nlink) {
+                        if (!child_ic->pino_nlink) {
-                                dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n",
+                                dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
                                          fd->ino, fd->name);
                                fd->next = *dead_fds;
                                *dead_fds = fd;
                        } else {
                                dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
-                                          fd->ino, fd->name, child_ic->nlink);
+                                          fd->ino, fd->name, child_ic->pino_nlink);
                                jffs2_free_full_dirent(fd);
                        }
                }
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 9645275023e6..a113ecc3bafe 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -82,28 +82,28 @@
        do {                                                            \
                printk(JFFS2_ERR_MSG_PREFIX                             \
                        " (%d) %s: " fmt, task_pid_nr(current),         \
-                        __FUNCTION__ , ##__VA_ARGS__);                  \
+                        __func__ , ##__VA_ARGS__);                      \
        } while(0)
 #define JFFS2_WARNING(fmt, ...)                                         \
        do {                                                            \
                printk(JFFS2_WARN_MSG_PREFIX                            \
                        " (%d) %s: " fmt, task_pid_nr(current),         \
-                        __FUNCTION__ , ##__VA_ARGS__);                  \
+                        __func__ , ##__VA_ARGS__);                      \
        } while(0)
 #define JFFS2_NOTICE(fmt, ...)                                          \
        do {                                                            \
                printk(JFFS2_NOTICE_MSG_PREFIX                          \
                        " (%d) %s: " fmt, task_pid_nr(current),         \
-                        __FUNCTION__ , ##__VA_ARGS__);                  \
+                        __func__ , ##__VA_ARGS__);                      \
        } while(0)
 #define JFFS2_DEBUG(fmt, ...)                                           \
        do {                                                            \
                printk(JFFS2_DBG_MSG_PREFIX                             \
                        " (%d) %s: " fmt, task_pid_nr(current),         \
-                        __FUNCTION__ , ##__VA_ARGS__);                  \
+                        __func__ , ##__VA_ARGS__);                      \
        } while(0)
 /*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c63e7a96af0d..c0c141f6fde1 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
        f = JFFS2_INODE_INFO(inode);
        dir_f = JFFS2_INODE_INFO(dir_i);
+        /* jffs2_do_create() will want to lock it, _after_ reserving
+           space and taking c-alloc_sem. If we keep it locked here,
+           lockdep gets unhappy (although it's a false positive;
+           nothing else will be looking at this inode yet so there's
+           no chance of AB-BA deadlock involving its f->sem). */
+        mutex_unlock(&f->sem);
        ret = jffs2_do_create(c, dir_f, f, ri,
                              dentry->d_name.name, dentry->d_name.len);
        if (ret)
@@ -219,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
        d_instantiate(dentry, inode);
        D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
-                  inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages));
+                  inode->i_ino, inode->i_mode, inode->i_nlink,
+                  f->inocache->pino_nlink, inode->i_mapping->nrpages));
        return 0;
 fail:
@@ -243,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
        ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
                              dentry->d_name.len, dead_f, now);
        if (dead_f->inocache)
-                dentry->d_inode->i_nlink = dead_f->inocache->nlink;
+                dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
        if (!ret)
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
        return ret;
@@ -276,7 +284,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
        if (!ret) {
                mutex_lock(&f->sem);
-                old_dentry->d_inode->i_nlink = ++f->inocache->nlink;
+                old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
                mutex_unlock(&f->sem);
                d_instantiate(dentry, old_dentry->d_inode);
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -493,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
        inode->i_op = &jffs2_dir_inode_operations;
        inode->i_fop = &jffs2_dir_operations;
-        /* Directories get nlink 2 at start */
-        inode->i_nlink = 2;
        f = JFFS2_INODE_INFO(inode);
+        /* Directories get nlink 2 at start */
+        inode->i_nlink = 2;
+        /* but ic->pino_nlink is the parent ino# */
+        f->inocache->pino_nlink = dir_i->i_ino;
        ri->data_crc = cpu_to_je32(0);
        ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -594,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
 {
+        struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
+        struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
        struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
        struct jffs2_full_dirent *fd;
        int ret;
+        uint32_t now = get_seconds();
        for (fd = f->dents ; fd; fd = fd->next) {
                if (fd->ino)
                        return -ENOTEMPTY;
        }
-        ret = jffs2_unlink(dir_i, dentry);
-        if (!ret)
+        ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
+                              dentry->d_name.len, f, now);
+        if (!ret) {
+                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+                clear_nlink(dentry->d_inode);
                drop_nlink(dir_i);
+        }
        return ret;
 }
@@ -817,7 +836,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
                   inode which didn't exist. */
                if (victim_f->inocache) {
                        mutex_lock(&victim_f->sem);
-                        victim_f->inocache->nlink--;
+                        if (S_ISDIR(new_dentry->d_inode->i_mode))
+                                victim_f->inocache->pino_nlink = 0;
+                        else
+                                victim_f->inocache->pino_nlink--;
                        mutex_unlock(&victim_f->sem);
                }
        }
@@ -838,8 +860,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
                struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
                mutex_lock(&f->sem);
                inc_nlink(old_dentry->d_inode);
-                if (f->inocache)
+                if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode))
-                        f->inocache->nlink++;
+                        f->inocache->pino_nlink++;
                mutex_unlock(&f->sem);
                printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 25a640e566d3..dddb2a6c9e2c 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
                        break;
 #endif
                default:
-                        if (ic->nodes == (void *)ic && ic->nlink == 0)
+                        if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
                                jffs2_del_ino_cache(c, ic);
        }
 }
@@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
        if (c->mtd->point) {
                unsigned long *wordebuf;
-                ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf);
+                ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
+                                    &retlen, &ebuf, NULL);
                if (ret) {
                        D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
                        goto do_flash_read;
@@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
                if (retlen < c->sector_size) {
                        /* Don't muck about if it won't let us point to the whole erase sector */
                        D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
-                        c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen);
+                        c->mtd->unpoint(c->mtd, jeb->offset, retlen);
                        goto do_flash_read;
                }
                wordebuf = ebuf-sizeof(*wordebuf);
@@ -349,7 +350,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
                   if (*++wordebuf != ~0)
                           break;
                } while(--retlen);
-                c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size);
+                c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
                if (retlen) {
                        printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
                               *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3eb1c84b0a33..086c43830221 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -273,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
        inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
        inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
-        inode->i_nlink = f->inocache->nlink;
+        inode->i_nlink = f->inocache->pino_nlink;
        inode->i_blocks = (inode->i_size + 511) >> 9;
@@ -286,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
        case S_IFDIR:
        {
                struct jffs2_full_dirent *fd;
+                inode->i_nlink = 2; /* parent and '.' */
                for (fd=f->dents; fd; fd = fd->next) {
                        if (fd->type == DT_DIR && fd->ino)
                                inc_nlink(inode);
                }
-                /* and '..' */
-                inc_nlink(inode);
                /* Root dir gets i_nlink 3 for some reason */
                if (inode->i_ino == 1)
                        inc_nlink(inode);
@@ -586,11 +585,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c,
 }
 struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
-                                                     int inum, int nlink)
+                                              int inum, int unlinked)
 {
        struct inode *inode;
        struct jffs2_inode_cache *ic;
-        if (!nlink) {
+        if (unlinked) {
                /* The inode has zero nlink but its nodes weren't yet marked
                   obsolete. This has to be because we're still waiting for
                   the final (close() and) iput() to happen.
@@ -638,8 +638,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
                        return ERR_CAST(inode);
        }
        if (is_bad_inode(inode)) {
-                printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n",
+                printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n",
-                       inum, nlink);
+                       inum, unlinked);
                /* NB. This will happen again. We need to do something appropriate here. */
                iput(inode);
                return ERR_PTR(-EIO);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index bad005664e30..090c556ffed2 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
                        continue;
                }
-                if (!ic->nlink) {
+                if (!ic->pino_nlink) {
-                        D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
+                        D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
                                  ic->ino));
                        spin_unlock(&c->inocache_lock);
                        jffs2_xattr_delete_inode(c, ic);
@@ -398,10 +398,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
           it's vaguely possible. */
        inum = ic->ino;
-        nlink = ic->nlink;
+        nlink = ic->pino_nlink;
        spin_unlock(&c->inocache_lock);
-        f = jffs2_gc_fetch_inode(c, inum, nlink);
+        f = jffs2_gc_fetch_inode(c, inum, !nlink);
        if (IS_ERR(f)) {
                ret = PTR_ERR(f);
                goto release_sem;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 8219df6eb6d8..1750445556c3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -177,7 +177,10 @@ struct jffs2_inode_cache {
 #ifdef CONFIG_JFFS2_FS_XATTR
        struct jffs2_xattr_ref *xref;
 #endif
-        int nlink;
+        uint32_t pino_nlink;    /* Directories store parent inode
+                                   here; other inodes store nlink.
+                                   Zero always means that it's
+                                   completely unlinked. */
 };
 /* Inode states for 'state' above. We need the 'GC' state to prevent
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 9df8f3ef20df..a9bf9603c1ba 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -709,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
                                break;
 #endif
                        default:
-                                if (ic->nodes == (void *)ic && ic->nlink == 0)
+                                if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
                                        jffs2_del_ino_cache(c, ic);
                                break;
                }
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 1b10d2594092..2cc866cf134f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
 void jffs2_gc_release_inode(struct jffs2_sb_info *c,
                            struct jffs2_inode_info *f);
 struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
-                                              int inum, int nlink);
+                                              int inum, int unlinked);
 unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
                                   struct jffs2_inode_info *f,
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 4cb4d76de07f..6ca08ad887c0 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
        /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
         * adding and jffs2_flash_read_end() interface. */
        if (c->mtd->point) {
-                err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
+                err = c->mtd->point(c->mtd, ofs, len, &retlen,
+                                    (void **)&buffer, NULL);
                if (!err && retlen < len) {
                        JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
-                        c->mtd->unpoint(c->mtd, buffer, ofs, retlen);
+                        c->mtd->unpoint(c->mtd, ofs, retlen);
                } else if (err)
                        JFFS2_WARNING("MTD point failed: error code %d.\n", err);
                else
@@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
                kfree(buffer);
 #ifndef __ECOS
        else
-                c->mtd->unpoint(c->mtd, buffer, ofs, len);
+                c->mtd->unpoint(c->mtd, ofs, len);
 #endif
        if (crc != tn->data_crc) {
@@ -136,7 +137,7 @@ free_out:
                kfree(buffer);
 #ifndef __ECOS
        else
-                c->mtd->unpoint(c->mtd, buffer, ofs, len);
+                c->mtd->unpoint(c->mtd, ofs, len);
 #endif
        return err;
 }
@@ -1123,7 +1124,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
        size_t retlen;
        int ret;
-        dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
+        dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino,
+                      f->inocache->pino_nlink);
        memset(&rii, 0, sizeof(rii));
@@ -1358,7 +1360,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
                }
                dbg_readinode("creating inocache for root inode\n");
                memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
-                f->inocache->ino = f->inocache->nlink = 1;
+                f->inocache->ino = f->inocache->pino_nlink = 1;
                f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
                f->inocache->state = INO_STATE_READING;
                jffs2_add_ino_cache(c, f->inocache);
@@ -1401,7 +1403,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
        jffs2_clear_acl(f);
        jffs2_xattr_delete_inode(c, f->inocache);
        mutex_lock(&f->sem);
-        deleted = f->inocache && !f->inocache->nlink;
+        deleted = f->inocache && !f->inocache->pino_nlink;
        if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
                jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 272872d27fd5..1d437de1e9a8 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
        size_t pointlen;
        if (c->mtd->point) {
-                ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf);
+                ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
+                                    (void **)&flashbuf, NULL);
                if (!ret && pointlen < c->mtd->size) {
                        /* Don't muck about if it won't let us point to the whole flash */
                        D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
-                        c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen);
+                        c->mtd->unpoint(c->mtd, 0, pointlen);
                        flashbuf = NULL;
                }
                if (ret)
@@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
                kfree(flashbuf);
 #ifndef __ECOS
        else
-                c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size);
+                c->mtd->unpoint(c->mtd, 0, c->mtd->size);
 #endif
        if (s)
                kfree(s);
@@ -940,7 +941,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
        ic->nodes = (void *)ic;
        jffs2_add_ino_cache(c, ic);
        if (ino == 1)
-                ic->nlink = 1;
+                ic->pino_nlink = 1;
        return ic;
 }
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f3353df178e7..7da69eae49e4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep;
 static struct inode *jffs2_alloc_inode(struct super_block *sb)
 {
-        struct jffs2_inode_info *ei;
+        struct jffs2_inode_info *f;
-        ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
-        if (!ei)
+        f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
+        if (!f)
                return NULL;
-        return &ei->vfs_inode;
+        return &f->vfs_inode;
 }
 static void jffs2_destroy_inode(struct inode *inode)
@@ -45,10 +46,10 @@ static void jffs2_destroy_inode(struct inode *inode)
 static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
 {
-        struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
+        struct jffs2_inode_info *f = foo;
-        mutex_init(&ei->sem);
+        mutex_init(&f->sem);
-        inode_init_once(&ei->vfs_inode);
+        inode_init_once(&f->vfs_inode);
 }
 static int jffs2_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 8de52b607678..0e78b00035e4 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
                                /* If it's an in-core inode, then we have to adjust any
                                   full_dirent or full_dnode structure to point to the
                                   new version instead of the old */
-                                f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink);
+                                f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink);
                                if (IS_ERR(f)) {
                                        /* Should never happen; it _must_ be present */
                                        JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 665fce9797d3..ca29440e9435 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -19,7 +19,8 @@
 #include "compr.h"
-int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri)
+int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
+                       uint32_t mode, struct jffs2_raw_inode *ri)
 {
        struct jffs2_inode_cache *ic;
@@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
        memset(ic, 0, sizeof(*ic));
        f->inocache = ic;
-        f->inocache->nlink = 1;
+        f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */
        f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
        f->inocache->state = INO_STATE_PRESENT;
@@ -438,10 +439,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
        ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
                                JFFS2_SUMMARY_INODE_SIZE);
        D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen));
-        if (ret) {
+        if (ret)
-                mutex_unlock(&f->sem);
                return ret;
-        }
+        mutex_lock(&f->sem);
        ri->data_crc = cpu_to_je32(0);
        ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -635,9 +636,9 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
                                        jffs2_mark_node_obsolete(c, fd->raw);
                                jffs2_free_full_dirent(fd);
                        }
-                }
+                        dead_f->inocache->pino_nlink = 0;
+                } else
-                dead_f->inocache->nlink--;
+                        dead_f->inocache->pino_nlink--;
                /* NB: Caller must set inode nlink if appropriate */
                mutex_unlock(&dead_f->sem);
        }
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index e48665984cb3..082e844ab2db 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_
 static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
 {
        /* must be called under down_write(xattr_sem) */
-        D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version));
+        D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version));
        if (xd->xname) {
                c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
                kfree(xd->xname);
@@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache
           When an inode with XATTR is removed, those XATTRs must be removed. */
        struct jffs2_xattr_ref *ref, *_ref;
-        if (!ic || ic->nlink > 0)
+        if (!ic || ic->pino_nlink > 0)
                return;
        down_write(&c->xattr_sem);
@@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
                           ref->xd and ref->ic are not valid yet. */
                        xd = jffs2_find_xattr_datum(c, ref->xid);
                        ic = jffs2_get_ino_cache(c, ref->ino);
-                        if (!xd || !ic || !ic->nlink) {
+                        if (!xd || !ic || !ic->pino_nlink) {
                                dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
                                          ref->ino, ref->xid, ref->xseqno);
                                ref->xseqno |= XREF_DELETE_MARKER;
@@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
        rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
        if (rc) {
                JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
-                              __FUNCTION__, rc, totlen);
+                              __func__, rc, totlen);
                rc = rc ? rc : -EBADFD;
                goto out;
        }
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 887f5759e536..bf6ab19b86ee 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -89,7 +89,7 @@ void jfs_proc_init(void)
 {
        int i;
-        if (!(base = proc_mkdir("jfs", proc_root_fs)))
+        if (!(base = proc_mkdir("fs/jfs", NULL)))
                return;
        base->owner = THIS_MODULE;
@@ -109,7 +109,7 @@ void jfs_proc_clean(void)
        if (base) {
                for (i = 0; i < NPROCENT; i++)
                        remove_proc_entry(Entries[i].name, base);
-                remove_proc_entry("jfs", proc_root_fs);
+                remove_proc_entry("fs/jfs", NULL);
        }
 }
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 40b16f23e49a..5df517b81f3f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -573,7 +573,7 @@ again:
                /* Ensure the resulting lock will get added to granted list */
                fl->fl_flags |= FL_SLEEP;
                if (do_vfs_lock(fl) < 0)
-                        printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
+                        printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
                up_read(&host->h_rwsem);
                fl->fl_flags = fl_flags;
                status = 0;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4d81553d2948..81aca859bfde 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
                return;
        default:
                printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
-                                -error, __FUNCTION__);
+                                -error, __func__);
                nlmsvc_insert_block(block, 10 * HZ);
                nlmsvc_release_block(block);
                return;
diff --git a/fs/locks.c b/fs/locks.c
index 44d9a6a7ec50..11dbf08651b7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
 #include <linux/capability.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -772,7 +773,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
         * give it the opportunity to lock the file.
         */
        if (found)
-                cond_resched();
+                cond_resched_bkl();
 find_conflict:
        for_each_lock(inode, before) {
@@ -1752,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
        struct file_lock *file_lock = locks_alloc_lock();
        struct flock flock;
        struct inode *inode;
+        struct file *f;
        int error;
        if (file_lock == NULL)
@@ -1824,7 +1826,15 @@ again:
         * Attempt to detect a close/fcntl race and recover by
         * releasing the lock that was just acquired.
         */
-        if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
+        /*
+         * we need that spin_lock here - it prevents reordering between
+         * update of inode->i_flock and check for it done in close().
+         * rcu_read_lock() wouldn't do.
+         */
+        spin_lock(&current->files->file_lock);
+        f = fcheck(fd);
+        spin_unlock(&current->files->file_lock);
+        if (!error && f != filp && flock.l_type != F_UNLCK) {
                flock.l_type = F_UNLCK;
                goto again;
        }
@@ -1880,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
        struct file_lock *file_lock = locks_alloc_lock();
        struct flock64 flock;
        struct inode *inode;
+        struct file *f;
        int error;
        if (file_lock == NULL)
@@ -1952,7 +1963,10 @@ again:
         * Attempt to detect a close/fcntl race and recover by
         * releasing the lock that was just acquired.
         */
-        if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
+        spin_lock(&current->files->file_lock);
+        f = fcheck(fd);
+        spin_unlock(&current->files->file_lock);
+        if (!error && f != filp && flock.l_type != F_UNLCK) {
                flock.l_type = F_UNLCK;
                goto again;
        }
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 2d4358c59f68..05ff4f1d7026 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -609,7 +609,7 @@ error_inode:
        if (corrupt < 0) {
                fat_fs_panic(new_dir->i_sb,
                             "%s: Filesystem corrupted (i_pos %lld)",
-                             __FUNCTION__, sinfo.i_pos);
+                             __func__, sinfo.i_pos);
        }
        goto out;
 }
diff --git a/fs/namei.c b/fs/namei.c
index e179f71bfcb0..32fd9655485b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -30,6 +30,7 @@
 #include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/fcntl.h>
+#include <linux/device_cgroup.h>
 #include <asm/namei.h>
 #include <asm/uaccess.h>
@@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
        if (retval)
                return retval;
+        retval = devcgroup_inode_permission(inode, mask);
+        if (retval)
+                return retval;
        return security_inode_permission(inode, mask, nd);
 }
@@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
        if (!dir->i_op || !dir->i_op->mknod)
                return -EPERM;
+        error = devcgroup_inode_mknod(mode, dev);
+        if (error)
+                return error;
        error = security_inode_mknod(dir, dentry, mode, dev);
        if (error)
                return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index fe376805cf5f..4fc302c2a0e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1176,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd)
 #endif
 }
-static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
-{
-        while (1) {
-                if (d == dentry)
-                        return 1;
-                if (d == NULL || d == d->d_parent)
-                        return 0;
-                d = d->d_parent;
-        }
-}
 struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
                                        int flag)
 {
@@ -1203,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
        p = mnt;
        list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
-                if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
+                if (!is_subdir(r->mnt_mountpoint, dentry))
                        continue;
                for (s = r; s; s = next_mnt(s, r)) {
@@ -2340,10 +2329,10 @@ void __init mnt_init(void)
        err = sysfs_init();
        if (err)
                printk(KERN_WARNING "%s: sysfs_init error: %d\n",
-                        __FUNCTION__, err);
+                        __func__, err);
        fs_kobj = kobject_create_and_add("fs", NULL);
        if (!fs_kobj)
-                printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__);
+                printk(KERN_WARNING "%s: kobj create error\n", __func__);
        init_rootfs();
        init_mount_tree();
 }
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index df6d60bdfcd3..97645f112114 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction
 }
 static inline char *
- ncp_reply_data(struct ncp_server *server, int offset)
+ncp_reply_data(struct ncp_server *server, int offset)
 {
        return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
 }
-static inline __u8 BVAL(void* data)
+static inline u8 BVAL(void *data)
 {
-        return get_unaligned((__u8*)data);
+        return *(u8 *)data;
 }
-static __u8
+static u8 ncp_reply_byte(struct ncp_server *server, int offset)
- ncp_reply_byte(struct ncp_server *server, int offset)
 {
-        return get_unaligned((__u8 *) ncp_reply_data(server, offset));
+        return *(u8 *)ncp_reply_data(server, offset);
 }
-static inline __u16 WVAL_LH(void* data)
+static inline u16 WVAL_LH(void *data)
 {
-        return le16_to_cpu(get_unaligned((__le16*)data));
+        return get_unaligned_le16(data);
 }
-static __u16
+static u16
- ncp_reply_le16(struct ncp_server *server, int offset)
+ncp_reply_le16(struct ncp_server *server, int offset)
 {
-        return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset)));
+        return get_unaligned_le16(ncp_reply_data(server, offset));
 }
-static __u16
+static u16
- ncp_reply_be16(struct ncp_server *server, int offset)
+ncp_reply_be16(struct ncp_server *server, int offset)
 {
-        return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset)));
+        return get_unaligned_be16(ncp_reply_data(server, offset));
 }
-static inline __u32 DVAL_LH(void* data)
+static inline u32 DVAL_LH(void *data)
 {
-        return le32_to_cpu(get_unaligned((__le32*)data));
+        return get_unaligned_le32(data);
 }
 static __le32
- ncp_reply_dword(struct ncp_server *server, int offset)
+ncp_reply_dword(struct ncp_server *server, int offset)
 {
-        return get_unaligned((__le32 *) ncp_reply_data(server, offset));
+        return get_unaligned((__le32 *)ncp_reply_data(server, offset));
 }
 static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) {
@@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
        result = ncp_request2(server, 72, bounce, bufsize);
        ncp_unlock_server(server);
        if (!result) {
-                int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + 
+                int len = get_unaligned_be16((char *)bounce +
-                          sizeof(struct ncp_reply_header))));
+                          sizeof(struct ncp_reply_header));
                result = -EIO;
                if (len <= to_read) {
                        char* source;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f2f3b284e6dd..89ac5bb0401c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = {
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = seq_release,
+        .owner          = THIS_MODULE,
 };
 static int nfs_volume_list_open(struct inode *inode, struct file *file);
@@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = {
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = seq_release,
+        .owner          = THIS_MODULE,
 };
 /*
@@ -1500,33 +1502,29 @@ int __init nfs_fs_proc_init(void)
 {
        struct proc_dir_entry *p;
-        proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+        proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
        if (!proc_fs_nfs)
                goto error_0;
        proc_fs_nfs->owner = THIS_MODULE;
        /* a file of servers with which we're dealing */
-        p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+        p = proc_create("servers", S_IFREG|S_IRUGO,
+                        proc_fs_nfs, &nfs_server_list_fops);
        if (!p)
                goto error_1;
-        p->proc_fops = &nfs_server_list_fops;
-        p->owner = THIS_MODULE;
        /* a file of volumes that we have mounted */
-        p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+        p = proc_create("volumes", S_IFREG|S_IRUGO,
+                        proc_fs_nfs, &nfs_volume_list_fops);
        if (!p)
                goto error_2;
-        p->proc_fops = &nfs_volume_list_fops;
-        p->owner = THIS_MODULE;
        return 0;
 error_2:
        remove_proc_entry("servers", proc_fs_nfs);
 error_1:
-        remove_proc_entry("nfsfs", proc_root_fs);
+        remove_proc_entry("fs/nfsfs", NULL);
 error_0:
        return -ENOMEM;
 }
@@ -1538,7 +1536,7 @@ void nfs_fs_proc_exit(void)
 {
        remove_proc_entry("volumes", proc_fs_nfs);
        remove_proc_entry("servers", proc_fs_nfs);
-        remove_proc_entry("nfsfs", proc_root_fs);
+        remove_proc_entry("fs/nfsfs", NULL);
 }
 #endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa220dc74609..7226a506f3ca 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data)
        return nfs_compare_mount_options(sb, server, mntflags);
 }
+static int nfs_bdi_register(struct nfs_server *server)
+{
+        return bdi_register_dev(&server->backing_dev_info, server->s_dev);
+}
 static int nfs_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
@@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
        if (s->s_fs_info != server) {
                nfs_free_server(server);
                server = NULL;
+        } else {
+                error = nfs_bdi_register(server);
+                if (error)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
@@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s)
 {
        struct nfs_server *server = NFS_SB(s);
+        bdi_unregister(&server->backing_dev_info);
        kill_anon_super(s);
        nfs_free_server(server);
 }
@@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
        if (s->s_fs_info != server) {
                nfs_free_server(server);
                server = NULL;
+        } else {
+                error = nfs_bdi_register(server);
+                if (error)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
@@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
        if (s->s_fs_info != server) {
                nfs_free_server(server);
                server = NULL;
+        } else {
+                error = nfs_bdi_register(server);
+                if (error)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
@@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
        if (s->s_fs_info != server) {
                nfs_free_server(server);
                server = NULL;
+        } else {
+                error = nfs_bdi_register(server);
+                if (error)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
@@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
        if (s->s_fs_info != server) {
                nfs_free_server(server);
                server = NULL;
+        } else {
+                error = nfs_bdi_register(server);
+                if (error)
+                        goto error_splat_super;
        }
        if (!s->s_root) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 562abf3380d0..0b3ffa9840c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes)
 } while (0)
 #define RESERVE_SPACE(nbytes)   do {                            \
        p = xdr_reserve_space(xdr, nbytes);                     \
-        if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+        if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
        BUG_ON(!p);                                             \
 } while (0)
@@ -134,7 +134,7 @@ xdr_error:                                      \
        p = xdr_inline_decode(xdr, nbytes); \
        if (!p) { \
                dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
-                        __FUNCTION__, __LINE__); \
+                        __func__, __LINE__); \
                return -EIO; \
        } \
 } while (0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 42f3820ee8f5..5ac00c4fee91 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -169,6 +169,7 @@ static const struct file_operations exports_operations = {
        .read           = seq_read,
        .llseek         = seq_lseek,
        .release        = seq_release,
+        .owner          = THIS_MODULE,
 };
 /*----------------------------------------------------------------------------*/
@@ -801,10 +802,9 @@ static int create_proc_exports_entry(void)
        entry = proc_mkdir("fs/nfs", NULL);
        if (!entry)
                return -ENOMEM;
-        entry = create_proc_entry("fs/nfs/exports", 0, NULL);
+        entry = proc_create("exports", 0, entry, &exports_operations);
        if (!entry)
                return -ENOMEM;
-        entry->proc_fops =  &exports_operations;
        return 0;
 }
 #else /* CONFIG_PROC_FS */
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d127..5e6724c1afd1 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...);
 extern void __ntfs_debug (const char *file, int line, const char *function,
        const char *format, ...) __attribute__ ((format (printf, 4, 5)));
 #define ntfs_debug(f, a...)                                             \
-        __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a)
+        __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
 extern void ntfs_debug_dump_runlist(const runlist_element *rl);
@@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
 extern void __ntfs_warning(const char *function, const struct super_block *sb,
                const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
-#define ntfs_warning(sb, f, a...)       __ntfs_warning(__FUNCTION__, sb, f, ##a)
+#define ntfs_warning(sb, f, a...)       __ntfs_warning(__func__, sb, f, ##a)
 extern void __ntfs_error(const char *function, const struct super_block *sb,
                const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
-#define ntfs_error(sb, f, a...)         __ntfs_error(__FUNCTION__, sb, f, ##a)
+#define ntfs_error(sb, f, a...)         __ntfs_error(__func__, sb, f, ##a)
 #endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2ad5c8b104b9..790defb847e7 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
                if (size) {
                        page = ntfs_map_page(mftbmp_mapping,
                                        ofs >> PAGE_CACHE_SHIFT);
-                        if (unlikely(IS_ERR(page))) {
+                        if (IS_ERR(page)) {
                                ntfs_error(vol->sb, "Failed to read mft "
                                                "bitmap, aborting.");
                                return PTR_ERR(page);
@@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
        }
        /* Read, map, and pin the page containing the mft record. */
        page = ntfs_map_page(mft_vi->i_mapping, index);
-        if (unlikely(IS_ERR(page))) {
+        if (IS_ERR(page)) {
                ntfs_error(vol->sb, "Failed to map page containing mft record "
                                "to format 0x%llx.", (long long)mft_no);
                return PTR_ERR(page);
@@ -2519,7 +2519,7 @@ mft_rec_already_initialized:
        ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
        /* Read, map, and pin the page containing the mft record. */
        page = ntfs_map_page(vol->mft_ino->i_mapping, index);
-        if (unlikely(IS_ERR(page))) {
+        if (IS_ERR(page)) {
                ntfs_error(vol->sb, "Failed to map page containing allocated "
                                "mft record 0x%llx.", (long long)bit);
                err = PTR_ERR(page);
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 98429fd68499..bc702dab5d1f 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -65,7 +65,7 @@ int o2cb_sys_init(void)
 {
        int ret;
-        o2cb_kset = kset_create_and_add("o2cb", NULL, NULL);
+        o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj);
        if (!o2cb_kset)
                return -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5f6d858770a2..1b81dcba175d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -44,7 +44,8 @@
 #define MLOG_MASK_PREFIX ML_DLM
 #include "cluster/masklog.h"
-int stringify_lockname(const char *lockname, int locklen, char *buf, int len);
+static int stringify_lockname(const char *lockname, int locklen, char *buf,
+                              int len);
 void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 {
@@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname);
 *
 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
 */
-int stringify_lockname(const char *lockname, int locklen, char *buf, int len)
+static int stringify_lockname(const char *lockname, int locklen, char *buf,
+                              int len)
 {
        int out = 0;
        __be64 inode_blkno_be;
@@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref)
        kfree(dc);
 }
-void dlm_debug_put(struct dlm_debug_ctxt *dc)
+static void dlm_debug_put(struct dlm_debug_ctxt *dc)
 {
        if (dc)
                kref_put(&dc->debug_refcnt, dlm_debug_free);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 61a000f8524c..e48aba698b77 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -327,7 +327,7 @@ clear_fields:
 static struct backing_dev_info dlmfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
-        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 static struct inode *dlmfs_get_root_inode(struct super_block *sb)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9154c82d3258..57e0d30cde98 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
        mlog_entry("(0x%p, '%.*s')\n", dentry,
                   dentry->d_name.len, dentry->d_name.name);
+        /* ensuring we don't even attempt to truncate a symlink */
+        if (S_ISLNK(inode->i_mode))
+                attr->ia_valid &= ~ATTR_SIZE;
        if (attr->ia_valid & ATTR_MODE)
                mlog(0, "mode change: %d\n", attr->ia_mode);
        if (attr->ia_valid & ATTR_UID)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ce0dc147602a..be774bdc8b36 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
        bh = osb->local_alloc_bh;
        alloc = (struct ocfs2_dinode *) bh->b_data;
-        alloc_copy = kmalloc(bh->b_size, GFP_KERNEL);
+        alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
        if (!alloc_copy) {
                status = -ENOMEM;
                goto out_commit;
@@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
         * local alloc shutdown won't try to double free main bitmap
         * bits. Make a copy so the sync function knows which bits to
         * free. */
-        alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL);
+        alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
        if (!alloc_copy) {
                status = -ENOMEM;
                mlog_errno(status);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index ac1d74c63bf5..bbd1667aa7d3 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node)
        return 0;
 }
-struct ocfs2_stack_operations o2cb_stack_ops = {
+static struct ocfs2_stack_operations o2cb_stack_ops = {
        .connect        = o2cb_cluster_connect,
        .disconnect     = o2cb_cluster_disconnect,
        .hangup         = o2cb_cluster_hangup,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 7428663f9cbb..b503772cd0ec 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = {
        .owner   = THIS_MODULE,
 };
-struct miscdevice ocfs2_control_device = {
+static struct miscdevice ocfs2_control_device = {
        .minor          = MISC_DYNAMIC_MINOR,
        .name           = "ocfs2_control",
        .fops           = &ocfs2_control_fops,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 7134007ba22f..ba9dbb51d25b 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
        .readlink       = page_readlink,
        .follow_link    = ocfs2_follow_link,
        .getattr        = ocfs2_getattr,
+        .setattr        = ocfs2_setattr,
 };
 const struct inode_operations ocfs2_fast_symlink_inode_operations = {
        .readlink       = ocfs2_readlink,
        .follow_link    = ocfs2_follow_link,
        .getattr        = ocfs2_getattr,
+        .setattr        = ocfs2_setattr,
 };
diff --git a/fs/open.c b/fs/open.c
index 7af1f05d5978..a1450086e92f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/quotaops.h>
 #include <linux/fsnotify.h>
 #include <linux/module.h>
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index e7dd1d4e3473..0fdda2e8a4cc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -41,12 +41,12 @@
 #ifndef CONFIG_LDM_DEBUG
 #define ldm_debug(...)  do {} while (0)
 #else
-#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a)
+#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
 #endif
-#define ldm_crit(f, a...)  _ldm_printk (KERN_CRIT,  __FUNCTION__, f, ##a)
+#define ldm_crit(f, a...)  _ldm_printk (KERN_CRIT,  __func__, f, ##a)
-#define ldm_error(f, a...) _ldm_printk (KERN_ERR,   __FUNCTION__, f, ##a)
+#define ldm_error(f, a...) _ldm_printk (KERN_ERR,   __func__, f, ##a)
-#define ldm_info(f, a...)  _ldm_printk (KERN_INFO,  __FUNCTION__, f, ##a)
+#define ldm_info(f, a...)  _ldm_printk (KERN_INFO,  __func__, f, ##a)
 __attribute__ ((format (printf, 3, 4)))
 static void _ldm_printk (const char *level, const char *function,
diff --git a/fs/pipe.c b/fs/pipe.c
index f73492b6817e..ec228bc9f882 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -17,6 +17,7 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/audit.h>
+#include <linux/syscalls.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -1076,6 +1077,26 @@ int do_pipe(int *fd)
 }
 /*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+asmlinkage long __weak sys_pipe(int __user *fildes)
+{
+        int fd[2];
+        int error;
+        error = do_pipe(fd);
+        if (!error) {
+                if (copy_to_user(fildes, fd, sizeof(fd))) {
+                        sys_close(fd[0]);
+                        sys_close(fd[1]);
+                        error = -EFAULT;
+                }
+        }
+        return error;
+}
+/*
 * pipefs should _never_ be mounted by userland - too much of security hassle,
 * no real gain from having the whole whorehouse mounted. So we don't need
 * any operations on the root directory. However, we need a non-trivial
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07d6c4853fe8..9e3b8c33c24b 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
 #include <linux/signal.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/times.h>
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
@@ -297,6 +298,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
        render_cap_t(m, "CapInh:\t", &p->cap_inheritable);
        render_cap_t(m, "CapPrm:\t", &p->cap_permitted);
        render_cap_t(m, "CapEff:\t", &p->cap_effective);
+        render_cap_t(m, "CapBnd:\t", &p->cap_bset);
 }
 static inline void task_context_switch_counts(struct seq_file *m,
@@ -425,12 +427,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        cutime = cstime = utime = stime = cputime_zero;
        cgtime = gtime = cputime_zero;
-        rcu_read_lock();
        if (lock_task_sighand(task, &flags)) {
                struct signal_struct *sig = task->signal;
                if (sig->tty) {
-                        tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
+                        struct pid *pgrp = tty_get_pgrp(sig->tty);
+                        tty_pgrp = pid_nr_ns(pgrp, ns);
+                        put_pid(pgrp);
                        tty_nr = new_encode_dev(tty_devnum(sig->tty));
                }
@@ -469,7 +472,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                unlock_task_sighand(task, &flags);
        }
-        rcu_read_unlock();
        if (!whole || num_threads < 2)
                wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c5e412a00b17..808cbdc193d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
 #include <linux/init.h>
 #include <linux/capability.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/namei.h>
@@ -195,12 +196,32 @@ static int proc_root_link(struct inode *inode, struct path *path)
        return result;
 }
-#define MAY_PTRACE(task) \
+/*
-        (task == current || \
+ * Return zero if current may access user memory in @task, -error if not.
-        (task->parent == current && \
+ */
-        (task->ptrace & PT_PTRACED) && \
+static int check_mem_permission(struct task_struct *task)
-         (task_is_stopped_or_traced(task)) && \
+{
-         security_ptrace(current,task) == 0))
+        /*
+         * A task can always look at itself, in case it chooses
+         * to use system calls instead of load instructions.
+         */
+        if (task == current)
+                return 0;
+        /*
+         * If current is actively ptrace'ing, and would also be
+         * permitted to freshly attach with ptrace now, permit it.
+         */
+        if (task->parent == current && (task->ptrace & PT_PTRACED) &&
+            task_is_stopped_or_traced(task) &&
+            ptrace_may_attach(task))
+                return 0;
+        /*
+         * Noone else is allowed.
+         */
+        return -EPERM;
+}
 struct mm_struct *mm_for_maps(struct task_struct *task)
 {
@@ -722,7 +743,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
        if (!task)
                goto out_no_task;
-        if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
+        if (check_mem_permission(task))
                goto out;
        ret = -ENOMEM;
@@ -748,7 +769,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
                this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
                retval = access_process_vm(task, src, page, this_len, 0);
-                if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
+                if (!retval || check_mem_permission(task)) {
                        if (!ret)
                                ret = -EIO;
                        break;
@@ -792,7 +813,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
        if (!task)
                goto out_no_task;
-        if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
+        if (check_mem_permission(task))
                goto out;
        copied = -ENOMEM;
@@ -1181,6 +1202,81 @@ static const struct file_operations proc_pid_sched_operations = {
 #endif
+/*
+ * We added or removed a vma mapping the executable. The vmas are only mapped
+ * during exec and are not mapped with the mmap system call.
+ * Callers must hold down_write() on the mm's mmap_sem for these
+ */
+void added_exe_file_vma(struct mm_struct *mm)
+{
+        mm->num_exe_file_vmas++;
+}
+void removed_exe_file_vma(struct mm_struct *mm)
+{
+        mm->num_exe_file_vmas--;
+        if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
+                fput(mm->exe_file);
+                mm->exe_file = NULL;
+        }
+}
+void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+{
+        if (new_exe_file)
+                get_file(new_exe_file);
+        if (mm->exe_file)
+                fput(mm->exe_file);
+        mm->exe_file = new_exe_file;
+        mm->num_exe_file_vmas = 0;
+}
+struct file *get_mm_exe_file(struct mm_struct *mm)
+{
+        struct file *exe_file;
+        /* We need mmap_sem to protect against races with removal of
+         * VM_EXECUTABLE vmas */
+        down_read(&mm->mmap_sem);
+        exe_file = mm->exe_file;
+        if (exe_file)
+                get_file(exe_file);
+        up_read(&mm->mmap_sem);
+        return exe_file;
+}
+void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
+{
+        /* It's safe to write the exe_file pointer without exe_file_lock because
+         * this is called during fork when the task is not yet in /proc */
+        newmm->exe_file = get_mm_exe_file(oldmm);
+}
+static int proc_exe_link(struct inode *inode, struct path *exe_path)
+{
+        struct task_struct *task;
+        struct mm_struct *mm;
+        struct file *exe_file;
+        task = get_proc_task(inode);
+        if (!task)
+                return -ENOENT;
+        mm = get_task_mm(task);
+        put_task_struct(task);
+        if (!mm)
+                return -ENOENT;
+        exe_file = get_mm_exe_file(mm);
+        mmput(mm);
+        if (exe_file) {
+                *exe_path = exe_file->f_path;
+                path_get(&exe_file->f_path);
+                fput(exe_file);
+                return 0;
+        } else
+                return -ENOENT;
+}
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a36ad3c75cf4..43e54e86cefd 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
                count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
                start = NULL;
-                if (dp->get_info) {
+                if (dp->read_proc) {
-                        /* Handle old net routines */
-                        n = dp->get_info(page, &start, *ppos, count);
-                        if (n < count)
-                                eof = 1;
-                } else if (dp->read_proc) {
                        /*
                         * How to be a proc read function
                         * ------------------------------
@@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name,
        int                     len;
        int                     rtn = 0;
+        de = *ret;
+        if (!de)
+                de = &proc_root;
        spin_lock(&proc_subdir_lock);
-        de = &proc_root;
        while (1) {
                next = strchr(cp, '/');
                if (!next)
@@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
        lock_kernel();
        spin_lock(&proc_subdir_lock);
-        if (de) {
+        for (de = de->subdir; de ; de = de->next) {
-                for (de = de->subdir; de ; de = de->next) {
+                if (de->namelen != dentry->d_name.len)
-                        if (de->namelen != dentry->d_name.len)
+                        continue;
-                                continue;
+                if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-                        if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
+                        unsigned int ino;
-                                unsigned int ino;
-                                ino = de->low_ino;
+                        ino = de->low_ino;
-                                de_get(de);
+                        de_get(de);
-                                spin_unlock(&proc_subdir_lock);
+                        spin_unlock(&proc_subdir_lock);
-                                error = -EINVAL;
+                        error = -EINVAL;
-                                inode = proc_get_inode(dir->i_sb, ino, de);
+                        inode = proc_get_inode(dir->i_sb, ino, de);
-                                goto out_unlock;
+                        goto out_unlock;
-                        }
                }
        }
        spin_unlock(&proc_subdir_lock);
@@ -410,7 +406,8 @@ out_unlock:
                d_add(dentry, inode);
                return NULL;
        }
-        de_put(de);
+        if (de)
+                de_put(de);
        return ERR_PTR(error);
 }
@@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
        lock_kernel();
        ino = inode->i_ino;
-        if (!de) {
-                ret = -EINVAL;
-                goto out;
-        }
        i = filp->f_pos;
        switch (i) {
                case 0:
@@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
        /* make sure name is valid */
        if (!name || !strlen(name)) goto out;
-        if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
+        if (xlate_proc_name(name, parent, &fn) != 0)
                goto out;
        /* At this point there must not be any '/' characters beyond *fn */
@@ -648,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
        return ent;
 }
+struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
+                struct proc_dir_entry *parent)
+{
+        struct proc_dir_entry *ent;
+        ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
+        if (ent) {
+                ent->data = net;
+                if (proc_register(parent, ent) < 0) {
+                        kfree(ent);
+                        ent = NULL;
+                }
+        }
+        return ent;
+}
+EXPORT_SYMBOL_GPL(proc_net_mkdir);
 struct proc_dir_entry *proc_mkdir(const char *name,
                struct proc_dir_entry *parent)
 {
@@ -682,9 +692,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
        return ent;
 }
-struct proc_dir_entry *proc_create(const char *name, mode_t mode,
+struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
-                                   struct proc_dir_entry *parent,
+                                        struct proc_dir_entry *parent,
-                                   const struct file_operations *proc_fops)
+                                        const struct file_operations *proc_fops,
+                                        void *data)
 {
        struct proc_dir_entry *pde;
        nlink_t nlink;
@@ -705,6 +716,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode,
        if (!pde)
                goto out;
        pde->proc_fops = proc_fops;
+        pde->data = data;
        if (proc_register(parent, pde) < 0)
                goto out_free;
        return pde;
@@ -734,55 +746,58 @@ void free_proc_entry(struct proc_dir_entry *de)
 void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
        struct proc_dir_entry **p;
-        struct proc_dir_entry *de;
+        struct proc_dir_entry *de = NULL;
        const char *fn = name;
        int len;
-        if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
+        if (xlate_proc_name(name, &parent, &fn) != 0)
-                goto out;
+                return;
        len = strlen(fn);
        spin_lock(&proc_subdir_lock);
        for (p = &parent->subdir; *p; p=&(*p)->next ) {
-                if (!proc_match(len, fn, *p))
+                if (proc_match(len, fn, *p)) {
-                        continue;
+                        de = *p;
-                de = *p;
+                        *p = de->next;
-                *p = de->next;
+                        de->next = NULL;
-                de->next = NULL;
+                        break;
+                }
-                spin_lock(&de->pde_unload_lock);
+        }
-                /*
+        spin_unlock(&proc_subdir_lock);
-                 * Stop accepting new callers into module. If you're
+        if (!de)
-                 * dynamically allocating ->proc_fops, save a pointer somewhere.
+                return;
-                 */
-                de->proc_fops = NULL;
-                /* Wait until all existing callers into module are done. */
-                if (de->pde_users > 0) {
-                        DECLARE_COMPLETION_ONSTACK(c);
-                        if (!de->pde_unload_completion)
-                                de->pde_unload_completion = &c;
-                        spin_unlock(&de->pde_unload_lock);
-                        spin_unlock(&proc_subdir_lock);
-                        wait_for_completion(de->pde_unload_completion);
+        spin_lock(&de->pde_unload_lock);
+        /*
+         * Stop accepting new callers into module. If you're
+         * dynamically allocating ->proc_fops, save a pointer somewhere.
+         */
+        de->proc_fops = NULL;
+        /* Wait until all existing callers into module are done. */
+        if (de->pde_users > 0) {
+                DECLARE_COMPLETION_ONSTACK(c);
+                if (!de->pde_unload_completion)
+                        de->pde_unload_completion = &c;
-                        spin_lock(&proc_subdir_lock);
-                        goto continue_removing;
-                }
                spin_unlock(&de->pde_unload_lock);
+                wait_for_completion(de->pde_unload_completion);
+                goto continue_removing;
+        }
+        spin_unlock(&de->pde_unload_lock);
 continue_removing:
-                if (S_ISDIR(de->mode))
+        if (S_ISDIR(de->mode))
-                        parent->nlink--;
+                parent->nlink--;
-                de->nlink = 0;
+        de->nlink = 0;
-                WARN_ON(de->subdir);
+        if (de->subdir) {
-                if (atomic_dec_and_test(&de->count))
+                printk(KERN_WARNING "%s: removing non-empty directory "
-                        free_proc_entry(de);
+                        "'%s/%s', leaking at least '%s'\n", __func__,
-                break;
+                        de->parent->name, de->name, de->subdir->name);
+                WARN_ON(1);
        }
-        spin_unlock(&proc_subdir_lock);
+        if (atomic_dec_and_test(&de->count))
-out:
+                free_proc_entry(de);
-        return;
 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 82b3a1b5a70b..6f4e8dc97da1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,8 +25,7 @@
 struct proc_dir_entry *de_get(struct proc_dir_entry *de)
 {
-        if (de)
+        atomic_inc(&de->count);
-                atomic_inc(&de->count);
        return de;
 }
@@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de)
 */
 void de_put(struct proc_dir_entry *de)
 {
-        if (de) {       
+        lock_kernel();
-                lock_kernel();          
+        if (!atomic_read(&de->count)) {
-                if (!atomic_read(&de->count)) {
+                printk("de_put: entry %s already free!\n", de->name);
-                        printk("de_put: entry %s already free!\n", de->name);
-                        unlock_kernel();
-                        return;
-                }
-                if (atomic_dec_and_test(&de->count))
-                        free_proc_entry(de);
                unlock_kernel();
+                return;
        }
+        if (atomic_dec_and_test(&de->count))
+                free_proc_entry(de);
+        unlock_kernel();
 }
 /*
@@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 {
        struct inode * inode;
-        if (de != NULL && !try_module_get(de->owner))
+        if (!try_module_get(de->owner))
                goto out_mod;
        inode = iget_locked(sb, ino);
@@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
                inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
                PROC_I(inode)->fd = 0;
                PROC_I(inode)->pde = de;
-                if (de) {
-                        if (de->mode) {
+                if (de->mode) {
-                                inode->i_mode = de->mode;
+                        inode->i_mode = de->mode;
-                                inode->i_uid = de->uid;
+                        inode->i_uid = de->uid;
-                                inode->i_gid = de->gid;
+                        inode->i_gid = de->gid;
-                        }
+                }
-                        if (de->size)
+                if (de->size)
-                                inode->i_size = de->size;
+                        inode->i_size = de->size;
-                        if (de->nlink)
+                if (de->nlink)
-                                inode->i_nlink = de->nlink;
+                        inode->i_nlink = de->nlink;
-                        if (de->proc_iops)
+                if (de->proc_iops)
-                                inode->i_op = de->proc_iops;
+                        inode->i_op = de->proc_iops;
-                        if (de->proc_fops) {
+                if (de->proc_fops) {
-                                if (S_ISREG(inode->i_mode)) {
+                        if (S_ISREG(inode->i_mode)) {
 #ifdef CONFIG_COMPAT
-                                        if (!de->proc_fops->compat_ioctl)
+                                if (!de->proc_fops->compat_ioctl)
-                                                inode->i_fop =
+                                        inode->i_fop =
-                                                        &proc_reg_file_ops_no_compat;
+                                                &proc_reg_file_ops_no_compat;
-                                        else
+                                else
 #endif
-                                                inode->i_fop = &proc_reg_file_ops;
+                                        inode->i_fop = &proc_reg_file_ops;
-                                } else {
+                        } else {
-                                        inode->i_fop = de->proc_fops;
+                                inode->i_fop = de->proc_fops;
-                                }
                        }
                }
                unlock_new_inode(inode);
@@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
        return inode;
 out_ino:
-        if (de != NULL)
+        module_put(de->owner);
-                module_put(de->owner);
 out_mod:
        return NULL;
 }                       
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index bc72f5c8c47d..28cbca805905 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
 #include <linux/proc_fs.h>
+extern struct proc_dir_entry proc_root;
 #ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
 #else
@@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
 extern int maps_protect;
-extern void create_seq_entry(char *name, mode_t mode,
-                                const struct file_operations *f);
-extern int proc_exe_link(struct inode *, struct path *);
 extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
                                struct pid *pid, struct task_struct *task);
 extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 941e95114b5a..79ecd281d2cb 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
 static int __init proc_nommu_init(void)
 {
-        create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations);
+        proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
        return 0;
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 441a32f0e5f2..74a323d2b850 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
                "PageTables:   %8lu kB\n"
                "NFS_Unstable: %8lu kB\n"
                "Bounce:       %8lu kB\n"
+                "WritebackTmp: %8lu kB\n"
                "CommitLimit:  %8lu kB\n"
                "Committed_AS: %8lu kB\n"
                "VmallocTotal: %8lu kB\n"
@@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
                K(global_page_state(NR_PAGETABLE)),
                K(global_page_state(NR_UNSTABLE_NFS)),
                K(global_page_state(NR_BOUNCE)),
+                K(global_page_state(NR_WRITEBACK_TEMP)),
                K(allowed),
                K(committed),
                (unsigned long)VMALLOC_TOTAL >> 10,
@@ -826,14 +828,6 @@ static struct file_operations proc_kpageflags_operations = {
 struct proc_dir_entry *proc_root_kcore;
-void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
-{
-        struct proc_dir_entry *entry;
-        entry = create_proc_entry(name, mode, NULL);
-        if (entry)
-                entry->proc_fops = f;
-}
 void __init proc_misc_init(void)
 {
        static struct {
@@ -862,66 +856,52 @@ void __init proc_misc_init(void)
        /* And now for trickier ones */
 #ifdef CONFIG_PRINTK
-        {
+        proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
-                struct proc_dir_entry *entry;
-                entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
-                if (entry)
-                        entry->proc_fops = &proc_kmsg_operations;
-        }
 #endif
-        create_seq_entry("locks", 0, &proc_locks_operations);
+        proc_create("locks", 0, NULL, &proc_locks_operations);
-        create_seq_entry("devices", 0, &proc_devinfo_operations);
+        proc_create("devices", 0, NULL, &proc_devinfo_operations);
-        create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+        proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
 #ifdef CONFIG_BLOCK
-        create_seq_entry("partitions", 0, &proc_partitions_operations);
+        proc_create("partitions", 0, NULL, &proc_partitions_operations);
 #endif
-        create_seq_entry("stat", 0, &proc_stat_operations);
+        proc_create("stat", 0, NULL, &proc_stat_operations);
-        create_seq_entry("interrupts", 0, &proc_interrupts_operations);
+        proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
 #ifdef CONFIG_SLABINFO
-        create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+        proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
 #ifdef CONFIG_DEBUG_SLAB_LEAK
-        create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations);
+        proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
 #endif
 #endif
 #ifdef CONFIG_MMU
        proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
 #endif
-        create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
+        proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
-        create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops);
+        proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
-        create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
+        proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
-        create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
+        proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
 #ifdef CONFIG_BLOCK
-        create_seq_entry("diskstats", 0, &proc_diskstats_operations);
+        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
 #endif
 #ifdef CONFIG_MODULES
-        create_seq_entry("modules", 0, &proc_modules_operations);
+        proc_create("modules", 0, NULL, &proc_modules_operations);
 #endif
 #ifdef CONFIG_SCHEDSTATS
-        create_seq_entry("schedstat", 0, &proc_schedstat_operations);
+        proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
 #endif
 #ifdef CONFIG_PROC_KCORE
-        proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL);
+        proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
-        if (proc_root_kcore) {
+        if (proc_root_kcore)
-                proc_root_kcore->proc_fops = &proc_kcore_operations;
                proc_root_kcore->size =
                                (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
-        }
 #endif
 #ifdef CONFIG_PROC_PAGE_MONITOR
-        create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations);
+        proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
-        create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations);
+        proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
 #endif
 #ifdef CONFIG_PROC_VMCORE
-        proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
+        proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
-        if (proc_vmcore)
-                proc_vmcore->proc_fops = &proc_vmcore_operations;
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
-        {
+        proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
-                struct proc_dir_entry *entry;
-                entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
-                if (entry)
-                        entry->proc_fops = &proc_sysrq_trigger_operations;
-        }
 #endif
 }
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 13cd7835d0df..83f357b30d71 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(get_proc_net);
-struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
-                struct proc_dir_entry *parent)
-{
-        struct proc_dir_entry *pde;
-        pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
-        if (pde != NULL)
-                pde->data = net;
-        return pde;
-}
-EXPORT_SYMBOL_GPL(proc_net_mkdir);
 static __net_init int proc_net_ns_init(struct net *net)
 {
        struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 614c34b6d1c2..5acc001d49f6 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -165,8 +165,8 @@ out:
        return err;
 }
-static ssize_t proc_sys_read(struct file *filp, char __user *buf,
+static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
-                                size_t count, loff_t *ppos)
+                size_t count, loff_t *ppos, int write)
 {
        struct dentry *dentry = filp->f_dentry;
        struct ctl_table_header *head;
@@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf,
         * and won't be until we finish.
         */
        error = -EPERM;
-        if (sysctl_perm(table, MAY_READ))
+        if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
                goto out;
        /* careful: calling conventions are nasty here */
        res = count;
-        error = table->proc_handler(table, 0, filp, buf, &res, ppos);
+        error = table->proc_handler(table, write, filp, buf, &res, ppos);
        if (!error)
                error = res;
 out:
@@ -204,44 +204,16 @@ out:
        return error;
 }
-static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
+static ssize_t proc_sys_read(struct file *filp, char __user *buf,
                                size_t count, loff_t *ppos)
 {
-        struct dentry *dentry = filp->f_dentry;
+        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
-        struct ctl_table_header *head;
+}
-        struct ctl_table *table;
-        ssize_t error;
-        size_t res;
-        table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
-        /* Has the sysctl entry disappeared on us? */
-        error = -ENOENT;
-        if (!table)
-                goto out;
-        /* Has the sysctl entry been replaced by a directory? */
-        error = -EISDIR;
-        if (!table->proc_handler)
-                goto out;
-        /*
-         * At this point we know that the sysctl was not unregistered
-         * and won't be until we finish.
-         */
-        error = -EPERM;
-        if (sysctl_perm(table, MAY_WRITE))
-                goto out;
-        /* careful: calling conventions are nasty here */
-        res = count;
-        error = table->proc_handler(table, 1, filp, (char __user *)buf,
-                                    &res, ppos);
-        if (!error)
-                error = res;
-out:
-        sysctl_head_finish(head);
-        return error;
+static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
+                                size_t count, loff_t *ppos)
+{
+        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 }
@@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
                goto out;
        /* Use the permissions on the sysctl table entry */
-        error = sysctl_perm(table, mask);
+        error = sysctl_perm(head->root, table, mask);
 out:
        sysctl_head_finish(head);
        return error;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 49816e00b51a..21f490f5d65c 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -5,7 +5,7 @@
 */
 #include <asm/uaccess.h>
+#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/time.h>
@@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = {
        .release        = seq_release,
 };
-/*
+static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
- * This is the handler for /proc/tty/ldiscs
- */
-static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
 {
-        int     i;
+        return (*pos < NR_LDISCS) ? pos : NULL;
-        int     len = 0;
+}
-        off_t   begin = 0;
+static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+        (*pos)++;
+        return (*pos < NR_LDISCS) ? pos : NULL;
+}
+static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
+{
+}
+static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
+{
+        int i = *(loff_t *)v;
        struct tty_ldisc *ld;
        
-        for (i=0; i < NR_LDISCS; i++) {
+        ld = tty_ldisc_get(i);
-                ld = tty_ldisc_get(i);
+        if (ld == NULL)
-                if (ld == NULL)
-                        continue;
-                len += sprintf(page+len, "%-10s %2d\n",
-                               ld->name ? ld->name : "???", i);
-                tty_ldisc_put(i);
-                if (len+begin > off+count)
-                        break;
-                if (len+begin < off) {
-                        begin += len;
-                        len = 0;
-                }
-        }
-        if (i >= NR_LDISCS)
-                *eof = 1;
-        if (off >= len+begin)
                return 0;
-        *start = page + (off-begin);
+        seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
-        return ((count < begin+len-off) ? count : begin+len-off);
+        tty_ldisc_put(i);
+        return 0;
+}
+static const struct seq_operations tty_ldiscs_seq_ops = {
+        .start  = tty_ldiscs_seq_start,
+        .next   = tty_ldiscs_seq_next,
+        .stop   = tty_ldiscs_seq_stop,
+        .show   = tty_ldiscs_seq_show,
+};
+static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &tty_ldiscs_seq_ops);
 }
+static const struct file_operations tty_ldiscs_proc_fops = {
+        .owner          = THIS_MODULE,
+        .open           = proc_tty_ldiscs_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
 /*
 * This function is called by tty_register_driver() to handle
 * registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -177,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver)
 {
        struct proc_dir_entry *ent;
                
-        if ((!driver->read_proc && !driver->write_proc) ||
+        if (!driver->ops->read_proc || !driver->driver_name ||
-            !driver->driver_name ||
            driver->proc_entry)
                return;
        ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
        if (!ent)
                return;
-        ent->read_proc = driver->read_proc;
+        ent->read_proc = driver->ops->read_proc;
-        ent->write_proc = driver->write_proc;
        ent->owner = driver->owner;
        ent->data = driver;
@@ -214,7 +227,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
 */
 void __init proc_tty_init(void)
 {
-        struct proc_dir_entry *entry;
        if (!proc_mkdir("tty", NULL))
                return;
        proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
@@ -224,10 +236,7 @@ void __init proc_tty_init(void)
         * password lengths and inter-keystroke timings during password
         * entry.
         */
-        proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL);
+        proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
+        proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
-        create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL);
+        proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
-        entry = create_proc_entry("tty/drivers", 0, NULL);
-        if (entry)
-                entry->proc_fops = &proc_tty_drivers_operations;
 }
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef0fb57fc9ef..95117538a4f6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -22,8 +22,6 @@
 #include "internal.h"
-struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 static int proc_test_super(struct super_block *sb, void *data)
 {
        return sb->s_fs_info == data;
@@ -126,8 +124,8 @@ void __init proc_root_init(void)
 #ifdef CONFIG_SYSVIPC
        proc_mkdir("sysvipc", NULL);
 #endif
-        proc_root_fs = proc_mkdir("fs", NULL);
+        proc_mkdir("fs", NULL);
-        proc_root_driver = proc_mkdir("driver", NULL);
+        proc_mkdir("driver", NULL);
        proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
 #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
        /* just give it a mountpoint */
@@ -137,7 +135,7 @@ void __init proc_root_init(void)
 #ifdef CONFIG_PROC_DEVICETREE
        proc_device_tree_init();
 #endif
-        proc_bus = proc_mkdir("bus", NULL);
+        proc_mkdir("bus", NULL);
        proc_sys_init();
 }
@@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns)
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
-EXPORT_SYMBOL(proc_create);
+EXPORT_SYMBOL(proc_create_data);
 EXPORT_SYMBOL(remove_proc_entry);
-EXPORT_SYMBOL(proc_root);
-EXPORT_SYMBOL(proc_root_fs);
-EXPORT_SYMBOL(proc_bus);
-EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7415eeb7cc3a..88717c0f941b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,11 +5,9 @@
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/pagemap.h>
-#include <linux/ptrace.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
-#include <linux/seq_file.h>
 #include <asm/elf.h>
 #include <asm/uaccess.h>
@@ -75,40 +73,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
        return mm->total_vm;
 }
-int proc_exe_link(struct inode *inode, struct path *path)
-{
-        struct vm_area_struct * vma;
-        int result = -ENOENT;
-        struct task_struct *task = get_proc_task(inode);
-        struct mm_struct * mm = NULL;
-        if (task) {
-                mm = get_task_mm(task);
-                put_task_struct(task);
-        }
-        if (!mm)
-                goto out;
-        down_read(&mm->mmap_sem);
-        vma = mm->mmap;
-        while (vma) {
-                if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
-                        break;
-                vma = vma->vm_next;
-        }
-        if (vma) {
-                *path = vma->vm_file->f_path;
-                path_get(&vma->vm_file->f_path);
-                result = 0;
-        }
-        up_read(&mm->mmap_sem);
-        mmput(mm);
-out:
-        return result;
-}
 static void pad_len_spaces(struct seq_file *m, int len)
 {
        len = 25 + sizeof(void*) * 6 - len;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8011528518bd..4b4f9cc2f186 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -1,6 +1,7 @@
 #include <linux/mm.h>
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/mount.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
@@ -103,40 +104,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
        return size;
 }
-int proc_exe_link(struct inode *inode, struct path *path)
-{
-        struct vm_list_struct *vml;
-        struct vm_area_struct *vma;
-        struct task_struct *task = get_proc_task(inode);
-        struct mm_struct *mm = get_task_mm(task);
-        int result = -ENOENT;
-        if (!mm)
-                goto out;
-        down_read(&mm->mmap_sem);
-        vml = mm->context.vmlist;
-        vma = NULL;
-        while (vml) {
-                if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
-                        vma = vml->vma;
-                        break;
-                }
-                vml = vml->next;
-        }
-        if (vma) {
-                *path = vma->vm_file->f_path;
-                path_get(&vma->vm_file->f_path);
-                result = 0;
-        }
-        up_read(&mm->mmap_sem);
-        mmput(mm);
-out:
-        return result;
-}
 /*
 * display mapping lines for a particular process's /proc/pid/maps
 */
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 23b647f25d08..234ada903633 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -306,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
                        printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
                        goto out_buf;
                }
-        dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
+        le16_add_cpu(&dh->dqdh_entries, 1);
        memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
        /* Find free structure in block */
        for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
@@ -448,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk)
                goto out_buf;
        }
        dh = (struct v2_disk_dqdbheader *)buf;
-        dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
+        le16_add_cpu(&dh->dqdh_entries, -1);
        if (!le16_to_cpu(dh->dqdh_entries)) {   /* Block got free? */
                if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
                    (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b41a514b0976..9590b9024300 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,6 +26,9 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/ramfs.h>
+#include "internal.h"
 const struct address_space_operations ramfs_aops = {
        .readpage       = simple_readpage,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8428d5b2711d..b13123424e49 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations;
 static struct backing_dev_info ramfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
-        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK |
+        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK |
                          BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
                          BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
 };
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index af7cc074a476..6b330639b51d 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,4 @@
 extern const struct address_space_operations ramfs_aops;
-extern const struct file_operations ramfs_file_operations;
 extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da86042b3e03..e396b2fa4743 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super,
        result = 0;
-        if (journal->j_dev_file != NULL) {
+        if (journal->j_dev_bd != NULL) {
-                result = filp_close(journal->j_dev_file, NULL);
+                if (journal->j_dev_bd->bd_dev != super->s_dev)
-                journal->j_dev_file = NULL;
+                        bd_release(journal->j_dev_bd);
-                journal->j_dev_bd = NULL;
-        } else if (journal->j_dev_bd != NULL) {
                result = blkdev_put(journal->j_dev_bd);
                journal->j_dev_bd = NULL;
        }
@@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super,
        result = 0;
        journal->j_dev_bd = NULL;
-        journal->j_dev_file = NULL;
        jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
            new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
@@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super,
                                         "cannot init journal device '%s': %i",
                                         __bdevname(jdev, b), result);
                        return result;
-                } else if (jdev != super->s_dev)
+                } else if (jdev != super->s_dev) {
+                        result = bd_claim(journal->j_dev_bd, journal);
+                        if (result) {
+                                blkdev_put(journal->j_dev_bd);
+                                return result;
+                        }
                        set_blocksize(journal->j_dev_bd, super->s_blocksize);
+                }
                return 0;
        }
-        journal->j_dev_file = filp_open(jdev_name, 0, 0);
+        journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal);
-        if (!IS_ERR(journal->j_dev_file)) {
+        if (IS_ERR(journal->j_dev_bd)) {
-                struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
+                result = PTR_ERR(journal->j_dev_bd);
-                if (!S_ISBLK(jdev_inode->i_mode)) {
+                journal->j_dev_bd = NULL;
-                        reiserfs_warning(super, "journal_init_dev: '%s' is "
-                                         "not a block device", jdev_name);
-                        result = -ENOTBLK;
-                        release_journal_dev(super, journal);
-                } else {
-                        /* ok */
-                        journal->j_dev_bd = I_BDEV(jdev_inode);
-                        set_blocksize(journal->j_dev_bd, super->s_blocksize);
-                        reiserfs_info(super,
-                                      "journal_init_dev: journal device: %s\n",
-                                      bdevname(journal->j_dev_bd, b));
-                }
-        } else {
-                result = PTR_ERR(journal->j_dev_file);
-                journal->j_dev_file = NULL;
                reiserfs_warning(super,
                                 "journal_init_dev: Cannot open '%s': %i",
                                 jdev_name, result);
+                return result;
        }
-        return result;
+        set_blocksize(journal->j_dev_bd, super->s_blocksize);
+        reiserfs_info(super,
+                      "journal_init_dev: journal device: %s\n",
+                      bdevname(journal->j_dev_bd, b));
+        return 0;
 }
 /**
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 8f86c52b30d8..b9dbeeca7049 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = {
        .read = seq_read,
        .llseek = seq_lseek,
        .release = seq_release,
+        .owner = THIS_MODULE,
 };
 static struct proc_dir_entry *proc_info_root = NULL;
@@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs";
 static void add_file(struct super_block *sb, char *name,
                     int (*func) (struct seq_file *, struct super_block *))
 {
-        struct proc_dir_entry *de;
+        proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
-        de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir);
+                         &r_file_operations, func);
-        if (de) {
-                de->data = func;
-                de->proc_fops = &r_file_operations;
-        }
 }
 int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/select.c b/fs/select.c
index 00f58c5c7e05..8dda969614a9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
 #include <linux/poll.h>
 #include <linux/personality.h> /* for STICKY_TIMEOUTS */
 #include <linux/file.h>
+#include <linux/fdtable.h>
 #include <linux/fs.h>
 #include <linux/rcupdate.h>
@@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 #define MAX_SELECT_SECONDS \
        ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
-static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
                           fd_set __user *exp, s64 *timeout)
 {
        fd_set_bits fds;
@@ -425,7 +426,7 @@ sticky:
        return ret;
 }
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
                fd_set __user *exp, struct timespec __user *tsp,
                const sigset_t __user *sigmask, size_t sigsetsize)
@@ -498,7 +499,7 @@ sticky:
                if (sigmask) {
                        memcpy(&current->saved_sigmask, &sigsaved,
                                        sizeof(sigsaved));
-                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                        set_restore_sigmask();
                }
        } else if (sigmask)
                sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -528,7 +529,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
        return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
 }
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
 struct poll_list {
        struct poll_list *next;
@@ -759,7 +760,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
        return ret;
 }
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
        struct timespec __user *tsp, const sigset_t __user *sigmask,
        size_t sigsetsize)
@@ -805,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
                if (sigmask) {
                        memcpy(&current->saved_sigmask, &sigsaved,
                                        sizeof(sigsaved));
-                        set_thread_flag(TIF_RESTORE_SIGMASK);
+                        set_restore_sigmask();
                }
                ret = -ERESTARTNOHAND;
        } else if (sigmask)
@@ -839,4 +840,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
        return ret;
 }
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db35933..619725644c75 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = {
 asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
 {
-        int error;
        sigset_t sigmask;
        struct signalfd_ctx *ctx;
-        struct file *file;
-        struct inode *inode;
        if (sizemask != sizeof(sigset_t) ||
            copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
                 * When we call this, the initialization must be complete, since
                 * anon_inode_getfd() will install the fd.
                 */
-                error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]",
+                ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
-                                         &signalfd_fops, ctx);
+                if (ufd < 0)
-                if (error)
+                        kfree(ctx);
-                        goto err_fdalloc;
        } else {
-                file = fget(ufd);
+                struct file *file = fget(ufd);
                if (!file)
                        return -EBADF;
                ctx = file->private_data;
@@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
        }
        return ufd;
-err_fdalloc:
-        kfree(ctx);
-        return error;
 }
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
index 734972b92694..fc4b1a5dd755 100644
--- a/fs/smbfs/smb_debug.h
+++ b/fs/smbfs/smb_debug.h
@@ -11,14 +11,14 @@
 * these are normally enabled.
 */
 #ifdef SMBFS_PARANOIA
-# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a)
+# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
 #else
 # define PARANOIA(f, a...) do { ; } while(0)
 #endif
 /* lots of debug messages */
 #ifdef SMBFS_DEBUG_VERBOSE
-# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a)
+# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
 #else
 # define VERBOSE(f, a...) do { ; } while(0)
 #endif
@@ -28,7 +28,7 @@
 * too common name.
 */
 #ifdef SMBFS_DEBUG
-#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a)
+#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
 #else
 #define DEBUG1(f, a...) do { ; } while(0)
 #endif
diff --git a/fs/splice.c b/fs/splice.c
index eeb1a86a7014..78150038b584 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 {
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
-        int killsuid, killpriv;
+        struct splice_desc sd = {
+                .total_len = len,
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
        ssize_t ret;
-        int err = 0;
-        killpriv = security_inode_need_killpriv(out->f_path.dentry);
-        killsuid = should_remove_suid(out->f_path.dentry);
-        if (unlikely(killsuid || killpriv)) {
-                mutex_lock(&inode->i_mutex);
-                if (killpriv)
-                        err = security_inode_killpriv(out->f_path.dentry);
-                if (!err && killsuid)
-                        err = __remove_suid(out->f_path.dentry, killsuid);
-                mutex_unlock(&inode->i_mutex);
-                if (err)
-                        return err;
-        }
-        ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+        inode_double_lock(inode, pipe->inode);
+        ret = remove_suid(out->f_path.dentry);
+        if (likely(!ret))
+                ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
+        inode_double_unlock(inode, pipe->inode);
        if (ret > 0) {
                unsigned long nr_pages;
@@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                 * sync it.
                 */
                if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+                        int err;
                        mutex_lock(&inode->i_mutex);
                        err = generic_osync_inode(inode, mapping,
                                                  OSYNC_METADATA|OSYNC_DATA);
diff --git a/fs/super.c b/fs/super.c
index a5a4aca7e22f..453877c5697b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s)
 * Drop a superblock's refcount.  Returns non-zero if the superblock was
 * destroyed.  The caller must hold sb_lock.
 */
-int __put_super(struct super_block *sb)
+static int __put_super(struct super_block *sb)
 {
        int ret = 0;
diff --git a/fs/sync.c b/fs/sync.c
index 7cd005ea7639..228e17b5e9ee 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
        /* sync the superblock to buffers */
        sb = inode->i_sb;
        lock_super(sb);
-        if (sb->s_op->write_super)
+        if (sb->s_dirt && sb->s_op->write_super)
                sb->s_op->write_super(sb);
        unlock_super(sb);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index dbdfabbfd609..e7735f643cd1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
                        goto out;
        }
        pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
-                 __FUNCTION__, count, *ppos, buffer->page);
+                 __func__, count, *ppos, buffer->page);
        retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
                                         buffer->count);
 out:
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d9262f74f94e..eb53c632f856 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = {
 static struct backing_dev_info sysfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
-        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 static const struct inode_operations sysfs_inode_operations ={
@@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
        if (error)
                return error;
+        iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
        error = inode_setattr(inode, iattr);
        if (error)
                return error;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd59..14f0023984d7 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
        /* instantiate and link root dentry */
        root = d_alloc_root(inode);
        if (!root) {
-                pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
+                pr_debug("%s: could not get root dentry!\n",__func__);
                iput(inode);
                return -ENOMEM;
        }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 42d51d1c05cd..38ebe3f85b3d 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d)
        if (sbi->s_bytesex == BYTESEX_PDP)
                *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d);
        else if (sbi->s_bytesex == BYTESEX_LE)
-                *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d);
+                le32_add_cpu((__le32 *)n, d);
        else
-                *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d);
+                be32_add_cpu((__be32 *)n, d);
        return *n;
 }
@@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n)
 static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d)
 {
        if (sbi->s_bytesex != BYTESEX_BE)
-                *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d);
+                le16_add_cpu((__le16 *)n, d);
        else
-                *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d);
+                be16_add_cpu((__be16 *)n, d);
        return *n;
 }
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 10c80b59ec4b..d87d354ec424 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -20,6 +20,7 @@
 #include <linux/hrtimer.h>
 #include <linux/anon_inodes.h>
 #include <linux/timerfd.h>
+#include <linux/syscalls.h>
 struct timerfd_ctx {
        struct hrtimer tmr;
@@ -180,10 +181,8 @@ static struct file *timerfd_fget(int fd)
 asmlinkage long sys_timerfd_create(int clockid, int flags)
 {
-        int error, ufd;
+        int ufd;
        struct timerfd_ctx *ctx;
-        struct file *file;
-        struct inode *inode;
        if (flags)
                return -EINVAL;
@@ -199,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
        ctx->clockid = clockid;
        hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
-        error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
+        ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
-                                 &timerfd_fops, ctx);
+        if (ufd < 0)
-        if (error) {
                kfree(ctx);
-                return error;
-        }
        return ufd;
 }
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2b34c8ca6c83..d3231947db19 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,7 @@
 #include <linux/buffer_head.h>
 #include <linux/sched.h>
 #include <linux/crc-itu-t.h>
+#include <linux/exportfs.h>
 static inline int udf_match(int len1, const char *name1, int len2,
                            const char *name2)
@@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
        sector_t offset;
        struct extent_position epos = {};
        struct udf_inode_info *dinfo = UDF_I(dir);
+        int isdotdot = dentry->d_name.len == 2 &&
+                dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.';
        size = udf_ext0_offset(dir) + dir->i_size;
        f_pos = udf_ext0_offset(dir);
@@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
                                continue;
                }
+                if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
+                    isdotdot) {
+                        brelse(epos.bh);
+                        return fi;
+                }
                if (!lfi)
                        continue;
@@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
                }
        }
        unlock_kernel();
-        d_add(dentry, inode);
-        return NULL;
+        return d_splice_alias(inode, dentry);
 }
 static struct fileIdentDesc *udf_add_entry(struct inode *dir,
@@ -307,7 +315,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
        uint16_t liu;
        int block;
        kernel_lb_addr eloc;
-        uint32_t elen;
+        uint32_t elen = 0;
        sector_t offset;
        struct extent_position epos = {};
        struct udf_inode_info *dinfo;
@@ -398,7 +406,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
        }
 add:
-        if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
+        /* Is there any extent whose size we need to round up? */
+        if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
                elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
                if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
                        epos.offset -= sizeof(short_ad);
@@ -1232,6 +1241,134 @@ end_rename:
        return retval;
 }
+static struct dentry *udf_get_parent(struct dentry *child)
+{
+        struct dentry *parent;
+        struct inode *inode = NULL;
+        struct dentry dotdot;
+        struct fileIdentDesc cfi;
+        struct udf_fileident_bh fibh;
+        dotdot.d_name.name = "..";
+        dotdot.d_name.len = 2;
+        lock_kernel();
+        if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
+                goto out_unlock;
+        if (fibh.sbh != fibh.ebh)
+                brelse(fibh.ebh);
+        brelse(fibh.sbh);
+        inode = udf_iget(child->d_inode->i_sb,
+                         lelb_to_cpu(cfi.icb.extLocation));
+        if (!inode)
+                goto out_unlock;
+        unlock_kernel();
+        parent = d_alloc_anon(inode);
+        if (!parent) {
+                iput(inode);
+                parent = ERR_PTR(-ENOMEM);
+        }
+        return parent;
+out_unlock:
+        unlock_kernel();
+        return ERR_PTR(-EACCES);
+}
+static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
+                                        u16 partref, __u32 generation)
+{
+        struct inode *inode;
+        struct dentry *result;
+        kernel_lb_addr loc;
+        if (block == 0)
+                return ERR_PTR(-ESTALE);
+        loc.logicalBlockNum = block;
+        loc.partitionReferenceNum = partref;
+        inode = udf_iget(sb, loc);
+        if (inode == NULL)
+                return ERR_PTR(-ENOMEM);
+        if (generation && inode->i_generation != generation) {
+                iput(inode);
+                return ERR_PTR(-ESTALE);
+        }
+        result = d_alloc_anon(inode);
+        if (!result) {
+                iput(inode);
+                return ERR_PTR(-ENOMEM);
+        }
+        return result;
+}
+static struct dentry *udf_fh_to_dentry(struct super_block *sb,
+                                       struct fid *fid, int fh_len, int fh_type)
+{
+        if ((fh_len != 3 && fh_len != 5) ||
+            (fh_type != FILEID_UDF_WITH_PARENT &&
+             fh_type != FILEID_UDF_WITHOUT_PARENT))
+                return NULL;
+        return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref,
+                        fid->udf.generation);
+}
+static struct dentry *udf_fh_to_parent(struct super_block *sb,
+                                       struct fid *fid, int fh_len, int fh_type)
+{
+        if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT)
+                return NULL;
+        return udf_nfs_get_inode(sb, fid->udf.parent_block,
+                                 fid->udf.parent_partref,
+                                 fid->udf.parent_generation);
+}
+static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
+                         int connectable)
+{
+        int len = *lenp;
+        struct inode *inode =  de->d_inode;
+        kernel_lb_addr location = UDF_I(inode)->i_location;
+        struct fid *fid = (struct fid *)fh;
+        int type = FILEID_UDF_WITHOUT_PARENT;
+        if (len < 3 || (connectable && len < 5))
+                return 255;
+        *lenp = 3;
+        fid->udf.block = location.logicalBlockNum;
+        fid->udf.partref = location.partitionReferenceNum;
+        fid->udf.generation = inode->i_generation;
+        if (connectable && !S_ISDIR(inode->i_mode)) {
+                spin_lock(&de->d_lock);
+                inode = de->d_parent->d_inode;
+                location = UDF_I(inode)->i_location;
+                fid->udf.parent_block = location.logicalBlockNum;
+                fid->udf.parent_partref = location.partitionReferenceNum;
+                fid->udf.parent_generation = inode->i_generation;
+                spin_unlock(&de->d_lock);
+                *lenp = 5;
+                type = FILEID_UDF_WITH_PARENT;
+        }
+        return type;
+}
+const struct export_operations udf_export_ops = {
+        .encode_fh      = udf_encode_fh,
+        .fh_to_dentry   = udf_fh_to_dentry,
+        .fh_to_parent   = udf_fh_to_parent,
+        .get_parent     = udf_get_parent,
+};
 const struct inode_operations udf_dir_inode_operations = {
        .lookup                         = udf_lookup,
        .create                         = udf_create,
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 63610f026ae1..96dfd207c3d6 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -27,8 +27,8 @@
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
-inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
+uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
-                               uint16_t partition, uint32_t offset)
+                        uint16_t partition, uint32_t offset)
 {
        struct udf_sb_info *sbi = UDF_SB(sb);
        struct udf_part_map *map;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe4..7a5f69be6ac2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
        sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
                                  GFP_KERNEL);
        if (!sbi->s_partmaps) {
-                udf_error(sb, __FUNCTION__,
+                udf_error(sb, __func__,
                          "Unable to allocate space for %d partition maps",
                          count);
                sbi->s_partitions = 0;
@@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
                bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
        if (bitmap == NULL) {
-                udf_error(sb, __FUNCTION__,
+                udf_error(sb, __func__,
                          "Unable to allocate space for bitmap "
                          "and %d buffer_head pointers", nr_groups);
                return NULL;
@@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
        /* Fill in the rest of the superblock */
        sb->s_op = &udf_sb_ops;
+        sb->s_export_op = &udf_export_ops;
        sb->dq_op = NULL;
        sb->s_dirt = 0;
        sb->s_magic = UDF_SUPER_MAGIC;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f3f45d029277..8fa9c2d70911 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -73,6 +73,7 @@ struct task_struct;
 struct buffer_head;
 struct super_block;
+extern const struct export_operations udf_export_ops;
 extern const struct inode_operations udf_dir_inode_operations;
 extern const struct file_operations udf_dir_operations;
 extern const struct inode_operations udf_file_inode_operations;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 244a1aaa940e..11c035168ea6 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -107,7 +107,6 @@ extern struct inode * ufs_new_inode (struct inode *, int);
 /* inode.c */
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
-extern void ufs_put_inode (struct inode *);
 extern int ufs_write_inode (struct inode *, int);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_delete_inode (struct inode *);
diff --git a/fs/utimes.c b/fs/utimes.c
index a2bef77dc9c9..af059d5cb485 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
 #endif
+static bool nsec_special(long nsec)
+{
+        return nsec == UTIME_OMIT || nsec == UTIME_NOW;
+}
 static bool nsec_valid(long nsec)
 {
-        if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
+        if (nsec_special(nsec))
                return true;
        return nsec >= 0 && nsec <= 999999999;
@@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                        newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
                        newattrs.ia_valid |= ATTR_MTIME_SET;
                }
-        } else {
+        }
+        /*
+         * If times is NULL or both times are either UTIME_OMIT or
+         * UTIME_NOW, then need to check permissions, because
+         * inode_change_ok() won't do it.
+         */
+        if (!times || (nsec_special(times[0].tv_nsec) &&
+                       nsec_special(times[1].tv_nsec))) {
                error = -EACCES;
                if (IS_IMMUTABLE(inode))
                        goto mnt_drop_write_and_out;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 5b66162d0747..a3522727ea5b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -986,7 +986,7 @@ error_inode:
        if (corrupt < 0) {
                fat_fs_panic(new_dir->i_sb,
                             "%s: Filesystem corrupted (i_pos %lld)",
-                             __FUNCTION__, sinfo.i_pos);
+                             __func__, sinfo.i_pos);
        }
        goto out;
 }
diff --git a/fs/xattr.c b/fs/xattr.c
index 89a942f07e1b..4706a8b1f495 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 }
 int
-vfs_setxattr(struct dentry *dentry, char *name, void *value,
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                size_t size, int flags)
 {
        struct inode *inode = dentry->d_inode;
@@ -131,7 +131,7 @@ out_noalloc:
 EXPORT_SYMBOL_GPL(xattr_getsecurity);
 ssize_t
-vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
+vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
 {
        struct inode *inode = dentry->d_inode;
        int error;
@@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size)
 EXPORT_SYMBOL_GPL(vfs_listxattr);
 int
-vfs_removexattr(struct dentry *dentry, char *name)
+vfs_removexattr(struct dentry *dentry, const char *name)
 {
        struct inode *inode = dentry->d_inode;
        int error;
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
 * Extended attribute SET operations
 */
 static long
-setxattr(struct dentry *d, char __user *name, void __user *value,
+setxattr(struct dentry *d, const char __user *name, const void __user *value,
         size_t size, int flags)
 {
        int error;
@@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
 }
 asmlinkage long
-sys_setxattr(char __user *path, char __user *name, void __user *value,
+sys_setxattr(const char __user *path, const char __user *name,
-             size_t size, int flags)
+             const void __user *value, size_t size, int flags)
 {
        struct nameidata nd;
        int error;
@@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
 }
 asmlinkage long
-sys_lsetxattr(char __user *path, char __user *name, void __user *value,
+sys_lsetxattr(const char __user *path, const char __user *name,
-              size_t size, int flags)
+              const void __user *value, size_t size, int flags)
 {
        struct nameidata nd;
        int error;
@@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
 }
 asmlinkage long
-sys_fsetxattr(int fd, char __user *name, void __user *value,
+sys_fsetxattr(int fd, const char __user *name, const void __user *value,
              size_t size, int flags)
 {
        struct file *f;
@@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
 * Extended attribute GET operations
 */
 static ssize_t
-getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
+getxattr(struct dentry *d, const char __user *name, void __user *value,
+         size_t size)
 {
        ssize_t error;
        void *kvalue = NULL;
@@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
 }
 asmlinkage ssize_t
-sys_getxattr(char __user *path, char __user *name, void __user *value,
+sys_getxattr(const char __user *path, const char __user *name,
-             size_t size)
+             void __user *value, size_t size)
 {
        struct nameidata nd;
        ssize_t error;
@@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
 }
 asmlinkage ssize_t
-sys_lgetxattr(char __user *path, char __user *name, void __user *value,
+sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
              size_t size)
 {
        struct nameidata nd;
@@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
 }
 asmlinkage ssize_t
-sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size)
+sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size)
 {
        struct file *f;
        ssize_t error = -EBADF;
@@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size)
 }
 asmlinkage ssize_t
-sys_listxattr(char __user *path, char __user *list, size_t size)
+sys_listxattr(const char __user *path, char __user *list, size_t size)
 {
        struct nameidata nd;
        ssize_t error;
@@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
 }
 asmlinkage ssize_t
-sys_llistxattr(char __user *path, char __user *list, size_t size)
+sys_llistxattr(const char __user *path, char __user *list, size_t size)
 {
        struct nameidata nd;
        ssize_t error;
@@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size)
 * Extended attribute REMOVE operations
 */
 static long
-removexattr(struct dentry *d, char __user *name)
+removexattr(struct dentry *d, const char __user *name)
 {
        int error;
        char kname[XATTR_NAME_MAX + 1];
@@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name)
 }
 asmlinkage long
-sys_removexattr(char __user *path, char __user *name)
+sys_removexattr(const char __user *path, const char __user *name)
 {
        struct nameidata nd;
        int error;
@@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name)
 }
 asmlinkage long
-sys_lremovexattr(char __user *path, char __user *name)
+sys_lremovexattr(const char __user *path, const char __user *name)
 {
        struct nameidata nd;
        int error;
@@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name)
 }
 asmlinkage long
-sys_fremovexattr(int fd, char __user *name)
+sys_fremovexattr(int fd, const char __user *name)
 {
        struct file *f;
        struct dentry *dentry;
author	Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>	2008-05-19 01:09:05 -0400
committer	Lachlan McIlroy <lachlan@redback.melbourne.sgi.com>	2008-05-19 01:09:05 -0400
commit	c203e45f069af47ca7623e4dcd8c00bfba2722e4 (patch)
tree	4563115b6565dcfd97015c1c9366fb3d07cabf19 /fs
parent	a94477da38e0b261a7ecea71f4c95a3bcd5be69c (diff)
parent	b8291ad07a7f3b5b990900f0001198ac23ba893e (diff)